From 0b82f146b5edf44724bb76a3a523e2dc7f2224a2 Mon Sep 17 00:00:00 2001 From: Matthew Exon Date: Wed, 3 Apr 2013 20:13:08 +0800 Subject: [PATCH] Added retriever plugin --- retriever.tgz | Bin 0 -> 12163 bytes retriever/database.sql | 35 ++ retriever/retriever.php | 738 +++++++++++++++++++++++++ retriever/view/extract.tpl | 36 ++ retriever/view/help.tpl | 148 +++++ retriever/view/rule-config.tpl | 111 ++++ retriever/view/settings.tpl | 17 + retriever/view/smarty3/extract.tpl | 41 ++ retriever/view/smarty3/help.tpl | 153 +++++ retriever/view/smarty3/rule-config.tpl | 116 ++++ retriever/view/smarty3/settings.tpl | 22 + 11 files changed, 1417 insertions(+) create mode 100644 retriever.tgz create mode 100644 retriever/database.sql create mode 100644 retriever/retriever.php create mode 100644 retriever/view/extract.tpl create mode 100644 retriever/view/help.tpl create mode 100644 retriever/view/rule-config.tpl create mode 100644 retriever/view/settings.tpl create mode 100644 retriever/view/smarty3/extract.tpl create mode 100644 retriever/view/smarty3/help.tpl create mode 100644 retriever/view/smarty3/rule-config.tpl create mode 100644 retriever/view/smarty3/settings.tpl diff --git a/retriever.tgz b/retriever.tgz new file mode 100644 index 0000000000000000000000000000000000000000..3155099ed893a7fbaaaa04ebeaf1048c19cb226d GIT binary patch literal 12163 zcma)iQ*b5>ux#v2vaxO3Hon;0@QZD1Y+D=Kwr$(CZQTD>-CK1Y&(qY*OV7*nOiwjo zBn(I&o~{xI_!YmE+XhGdm**3$B0)U}N-W;S*#S>CBKm%W3 zL*J|K2OU#KPe;#dN&jzK^D^*i$%X0`a^uyTs%V8Pq6%3NTl;R%QStKJvfpbi_Hf5H zkrpt#(`zw`aOxy5}| zzmP@v++lt5ikuf8B|e4F$y4F7W)+2ONsX63p-xvTa~oRy88nBpwHselR;)wa=*B@; zVAjFTs$hFh9ZkNrto|Npj70MHSeFuWX}MdIOtcGdQyY(xa zyw->JMz2W_C7?L}N^;sgub7&s1tNoRQI)88LZ9_NR4B*i@;p??gW40ozF#~#j}Kqj zVDd%BT(AGJ@k&pDV}p^9cbt)uobQSI&^@1b)@)}ra%Fa&unm}*53pOA=U!+#;zj}_ zIL*?=SX9!vcLMGP;!-<)+(sNZXq)uToYA>54e4N2rq&U=4ywPumW~>F(#(P1%pTs{?a|Vy4R+EEIzD-^4mS3hm zUptzAbgF+89BlqV7wB2=d#M}Ftmd&hbF9@FKs*nFj@{xwV29~=cU_2e&EgN)?%LIJ z`kz3y&=GK19|nm|IDJ`R=++%1CN9h4A*`V6ddg7*hOBaS?GE z-camRE7OhsA|M%05?>Bwe(bc2{+r*1&hwWuVsD5Drn=&I6!%5%hs4oHlkGiT%dFUR z!+(b+okWueGP_3a!!?A1K|&Z!u#y-tzmVM>Qnr^VEXxB|+6tvI0cw!MjTr~R!JgUq z^Dgts93-g`4eGnIwhm5+)Sgz!ZWn=m*-)eUME5c4Risu&wP_0(yTsE$_=dq!j@1}J zI!V2f2?At#Bp!2Be(4iq9o1euyaFVpVB1fvtQ1`seaspHq~ZsZQT)!!AJ^A9)A^;N zM&S+Y>-hB)gblywMiX6qy*tFIuy18Yxw%B(<5&lwynO5nqHiTi zf=dX`&a`wsbe@cJw6E>)*}wTFIuCA#0Y!1H&`0b}fkZsZ?3{-Wt5?Xdg&W)lzTm$# zR^&w>9BxxQ8`ej~t3cb&;WLo&?Fsq)e~2IB9OtEYo0|GANA1Oral68Q{f$08Cxdv= z6=)osfbF6cH)0g7!$HE67$7Fkhzji^CDu2$k5a~?f2l0XT=F6pS7&%XZrUj;-;?#s zUVGlq!&nQlE&Rz}!3#6i|M4_18fKM#6F`0kAH3L?gUl`J7dJwQx%pe>9?}n@y}+)_ z^qp80^91&uPtvFjn2$?S}xC;SVUh5?qAA0ym_jq zG192E*)bD_(x$=p55&VSo1b^s?V5)ID2ksqsLnC!b3;7M$SPed?4E_?Oyf&Bl?B|Y zCo1`!xjSwmkdzkOHpRnetXhdyN~PJoc8QSLQ|@l_sbPs8`eq-Jg5RtB-K$&NTVqzd znh@=x=K)vtF2Yqfp~jRN(BYC#)04kf1LaTuoxI5-TQeaD^RJxO&{Y1$!o^f=wrt#P zDW5WHda1%VHGh*hhaJtR?kW_NL0>!+%W7D9F#32&zUM8ho_)E=Z7{aLLXJ%2lr=$!`IP*bI&a9H*3*y&yF8`~5EWm3|HLk5;eOfMF0@w4% ztZ4Aj7a9EsjZekkNb5Dgde4>DkOMA%`QdT=IOEKXo^vnVc-0z7Ht}>02nu-tvjuDV zwi4b|fOIaYGo*7C0w+Uqzo||Oldm)^_xhA77ytTj znzZ?sjvb}wN)g|utI82E{?RSK-Jr8`XH=7{C<%T;HC=dBt@vJ^e0>}LQ1ksPXnR=i z_||nvzI^|rB44v!TcT}fd;bxgxgO|+GW6@L_Kj_-pv3^Z$Z`{E(>Jf%vT_<>JZxnT zkW9Fx2@>dwJVfDZx}~<-K;IMyGeOZJ*}fIxg)SGJunmRCS!O=`ISdOc2jN{?h!;0n zp$G=G*Ht=%OX|t}f=AZ4vTJ%h@eADj9`zmHe(c(vzfZ%c+(eMs*?azlLZ_AUy0g^I z?fh3)$ue;Txl=6p8O-IJLXj=ABEV*AO5ew3`X1zG*`fNRoC(5~Fs*4)9yj3;EGgVU zRTWtn7BYCbh!~1L-G;f%R1GT`2gCiF@8AQZyKpOBWUm4J<+HLXZIzbj{Q%Zf%_)!Yo_?|%=WK2dK3A4@XL^r;cZ>HlJM$W^b8Bs@YM;0J7Da}Ac~PVi6}c@D zY}N{)l=v zIq3;Hm&K~m%NPr2cb1wN{eUrNe$$-V!T1z5+Od%As?Hx-PPUnx3mjeVIPRzofA|0f zgyFpW*>!(4Q_LgoQ}22s3>~>HmmM9>OcZBAOZr-8NI6YbR(m73if1(Y(P}+2 z_uH9chI9S4gwy9n??^R;Jh_}NmznMGTEXMgov&8u5Xlh4Si&R_M}Kgk%SGfqY1PKJ zd+$FGdv_w0ynFLaJd}uV;+QU(D|JBC1H@y$2y|IK5I;b=A_MZkWixi{j2gp2ULyH+ z$oO$2v;|5V=!O!(+y_eJf6H_yc*a_GrfGVNo-Je4QpweMhdW!k%EZqs%kCTj9Z=v) zEhM6^E)VTzx|f#ZMP(vg&g%5x0NEp4p*^~=@C5gha4W(e7gg`_E)h63+~3o-MDOqIHT&qKG|-QANBID_V8kZqrVCd*MEZ z#(L=u>bYdP?zd~YO<2o>f=mnlf>>O=2Nz;;nNZV^!uI~Y;!Qan(9be&{?YZqd0Ge>TOul+M_W8H>_=&1Ntz^hi5vZj!W^o=XE@&6hT3NS@4zg0D{6Fn zX;jF%f8A?FQelyaZi1>FM6AP)&auARYa`We9Y3XGTU|ria@vRSFp29rX3&zMC4x$U zi7SAFXC@MEv9QFK4mNfcd>I&;kmQ%l#FOG0#fBIO)F!F$R~u*n_nzRazaA89N_z);#YF(&M) zG#?AtHa=M)t?pk79~I!^5m;SgoRvMQ4GPfu49h-BTBLbMNI61NJ$v>Km!qTBSLlMxsftstNRh~Wq22zkTgus5HGxv)KHyl#{qJRPY&~}i&M-AaH#2OQD%x8 zPl0dF9lSfh4Mty&WJvF3SQ=Il5ZNjiq2Q6$J10dxZfsO?FCquKmh}42x}^t)(|bD zGR=xDX_q2r4sVhVmv*Kud?D3>BT{k#;_hCzLBC~1RVy#Qxx(tjN-j1&gF~(_x_5}n z>Bz_*>g5!vghLBAbIxX`$lT_9Pf8gT>0}jAV zKBev&J@40Bw#jm_E&>*{FNJ3aWdrOjNqk_p30y5FRPZBa2BRVE0)2Fuk(;nMB{b9J<+A2FPJsp+>jqExF}@V5UsGzZud5N*JNJoT-u(`R63A>K>Z>%X05a|K}1#U z4A2r=x=Q?9NO0jK&H;S}oFh%%2RrkonhR=mn}j5w7vU72%bTXVjOPHG%2dxl_!QSh z&#`i%Nr2lVUyca?W@kK^kJE1K$SS{23gY`RrAA4@;bsr>h6~WdGT99R;hA+oS z&kri%fw92Y)mfzIhP+t&%w!J3vSe5F>75oHT>my_C02H8C_hTVmEzXZJjy7NzES+2M^=B~ z*NV+Tp=ilh3G+QJGcY==1cIel?R+bUTUCWmA(Lxawx>Uv8uHrIPic|)A%TaQSqZP$ z1>R=t>j61<+GgYP+jb=vMlbDHNguvmb{^27g7x%&p?ySifk?xvifv2dOWI>g;BfV7 zd8Z<5KudHI#3q!esq9JWnRqy6MB_i&>&&&8c1S{Z2-`)eR9jYaY-mPLxSA$|Y6-yQ z?-vs%4b5>X`JG%cc6N`r;%w*}tO;fjTxvubbB|%joqzK*mCM+key`puM?h&Xxp_b{ zqxr!Aa?xq5mQ#K+6nO_~ndR362_wT>X~kPp;KPyyO#&-S4Xn;TKq&|_CzR|(K}AHY zSUiZ7iZQD5M&CdhO{JVcMXk~p0*e8>g@whivUh{9WwBozrZfv+wTV%s%B!RmH@VVc zW13pjs>*?=F&eu)6Ebl9_xbx0hi3RhmfvtlJ0Zd|;rT>>ix`}ZT0aOwZuxPW>SqEWFat~)5 z#YhT2ML;3_|vQ6D)p3Rn$c|G4R}WwW&Lhpm+(r> z+w~*6jMrz0dM$U6blESHeP~6s21H{E#>!*%hee6DURAz=r|(Jp=N-3lGd{tmg3r5e zJi62y%bS_)mfBvAiRh4!$9SAoLQ!Tzb#kxBk0$Jmtcv(1UDEpK$UM~NtU(EFejN?k ztg7r2G=ID^jy_%Nk4-mPYZnWfI}hO^ffoOa3_!h`IeT8vWIDj{i_+JEfd@`ue899` zoz&}iYWiDR!-;K!PXAyYTu_ysY0Y}Rq<`jEPuf4G)B3yoy3e z)M~+}25OQw{073AVVY%{HNoaZU_)aTL7rSvJIRM?P+<1y&UO_oXY8IOs3;WYI2Y22 zyXwMi?`s#59v|bWNDswrTT!BSC9INH>%M{fk;x8wjCoD&ZD1VqF=)RbAvjD)KBUG- zvhf~Sw@$1ohHs(irtS#+`^7$K*ObK=`Abe~?P;-)4Npu2toy#wXSyE{Uwj1{=U(lL zs%uOy7`8==nG{&=Ux8b6gyWLMqDZ?lG|QMhj!5fSWTwX-d6|Y%ilxA9!%q*HfmDv? z59R(l=Bs>c;qNaCO$;i>(!`vg^Kjk9kOy=6*o_qNBjOWJ`ki?~*Rf#~^(! z!Y!(p@?(PBgXii7vxDJ1J^c>i#$Y+h^FxKfJyV>GR>4T|Ov29H`o)U)MVXzQc8+%t z^W@!Jp*Iq#p51>UUKUgS2hY%T&F2b_<=RB-`uwV}T1_8yC$nWc)%9Q^p~%U;lTL;0p!LYB#C38x*2OH*{~q z0~(Sp-d3jd`jNa5REuXIBBVH8vwd#=mR2xffqi2W<^gLh#>3uu914^_o(8BpnLK3D zP1|ylA`X_;5vU$|s&w%C+2^IX#zhnKEcM`%;1v zxz^a^k+t}FH{lfP)J=4ejf{C3pcUp%MEhDxS-#RIzZ_iQ_4!gxhKn=g@X?)Orl8X? zl}C&f7)L}u`(R^x%m)v{RaimYy*^zpv=dTIaS1tEGf63b3H<`cT8k726?E&Dhy*HnY`E8o6#hG%U^bWb^ z`gT?)B(nwRkP}f!FWo&{V$c?J7vH*PmaSO;pjyUS$tbK?sgAt@rP6)FNoZhc3I2|G zSS?>WF#6UH6{~Plp$eFllex!hOf$A3Q>0aDS^l%O3o?rUr4BraDrDa zjG<@ZRpsn}EBedbi<-%ucrG*3(_4`A8@2H}t<7t{G+$?JR!mSropvfF;{T_V=+h9i ze$zPF1UMlM*I9m)vX$BlH?MDeu1Q+ zKif^Ki-U~y-bt*H=HFfLt_9IK0G#uQ5QYb2>4o9rch;t4%P*Dm{1HRp=y~c0oQEfku?IS>LkELt-V3 z68r;1E_Fh%dcw^h%>zRl1&J!yyISuasUF(%)>LSEsBh7v9^{v_uB)c@g7{-2qOWuG zBTh(#BQVBoaWg|)XU|kDU25v|`{qhDQhtt~GYK+WHWYzh%^*(m)Q4vf6s1R~ny$1? zcs2nd1vE*`g5qpKRZ4&D1VejB)-Ptr5-t=X=F8{e8&(=J+w>8*6%k|k1rzuA`LUXd zlQd8MK(wdF(o|u5&|@9kseKVai8~ndDNLB%sqskj z+?W%T$lp_kcH3AS!xPOY*&~!&o2mW*;6Oaax5=UzW^upn5Za7gc~x3aZs+mZbke$- z08(!E%F&LgD-hyF7mgodl?azxFz7TJqw3S+nlA9_()|lC`klqa7jnVccmeg(VT@21 zv3^cxaa`B4C|AGrq1>GJrlpcD9=-sFR{8r)OYN%3H6P9n8P!rmPHeB^Um6RPgXclA zU;TOA5F=7vajrIBI{Pq1d%iIH|)uZpR++yJ* zFSW!xDSBR$#4-hMcWW%vdTn zs#yo~4%FOXj-?){TY;wcNRlk9MC!&YG8J>-G?SF#Tc2XxeDS*m8%S0!VSX4$XdiAe zNSDbTvieg+CvYan7D~GQ&_2|>7gU7lp*kBC1Kp4iVzL_4O*zEWxK@~`A92r{ppfTm0RFbpX z0*HpTKM!e>C~+x2{Zda>@`&syovj6lHBE1A_s5iP*e*M_(TtNGC>IP>M?kCf{e5IF-f}1@c_3BSLm{hDIRyfX_6`_X%5oTw5f8 z?LK2`W9lg@F|TxYH!FI7!8v^~3kp~%kGj*MdWzDFu^oBf%(xesnj%ikE|MCCEMt3s z`7k{8)J$r4DjmFYKST?@o$#^b+Q-l$Eq)X3N}G*bO>OUzZOb1UYo_-X@}HKj=YStt zhUWI>_UH1hcSjqWER zPzs6WU>ns27?b+u1O+(tKL)*)Q|8I3aU75!!qk$co_(jbI$;1$p=tk+N*w>v=S=-g zX75d(vah!<*1%tTW+S`DsLhftIy7uPl>SM^s!)$-zds*gF{=+{YhPZ9#L&besz6{y zT0qD7uY9cku9=gKjL|X-&Vt%s|7vp~C15wzlkkULX7^n*x1Kl;5Hc?UbQQGu9wH}#zQ9%#d%?y)O0hPY-YsF5}#|}9tdi7i~dbix48Y$WV0%1M( z(Z;&GvS3DgN7C>H;lv$}9eKvG!ItB0HetR`Mi&~v*pvqfgo!0^I)7#c1K9 zXG02p9Ymq+>aj@IQ=4L{*8Rht{vML@SnXF0hWUh5HEqlHgng-`jq&|^Py6<^=1kQO zF5|L3{mM^CYcrJ9Mj88SiZ$sos(qh2{I+Vd#0L6%5N=CG;b-jV9f7}U3^pCPJXZv% zD`wPCoANfk_Ba-;m$zd-RePm~6~uBoPGC9(N7vCYQ5aWCsPM8O(_yYh!cLVFb{n(L z$yJMiZr>F2%Q>Q;$qKFs2c`oh{s#0vm;!A- z?>9JrdTG=bR}icN`~gpE1?pGWGCzXi(UvE8B^aE@Og__{K38{P_Wqv0BkY=tAdKIA zv+-hzAB?@4A5d=8E9fqgE=Fmo{%rKkFZgIKG3i!`rAf1wn|I`? zgiweb(FX?SHLE6u1S{?tVX8qlIUJcr1&0G&nqTKaEd+Uhwax1+6vx$0k0oi1v^L_l z56Aal(J&ExlXUk+-Fno`y(eUF5T9_g-r#+CPH0R<%Xg`_!%T!byB^(_LI`?BLL>&+ zAsiI}FyM{fcp+yj;Zs0GQV`sObU&S#ZFfyO(dR}th~|}%RkTdkd>y_E_aW(rtk8#|d!dvT278 zH($L9X?$x9!vya1^S*Z*$()05@f$w&y+2#s!5Y#f$q)@Llf;99ndp!I z8C{lA3?Znb6xoQM3}h7kjmz|HtYDcihh@VchP zVW|O=Qi|-g{Af=3TEVb~Bk@XYQ}Q;zFnn}U9XOuRFIC;)HX+d@J0*$UAz^VWK`vJG zHrTJTd?t9qb|gA9=DdSiwmKV?U8d*V*c02ESMTlsiK%k=h8>fTt8FY7pcv_YG$biu zQvpeCdZ$`fIUehQlSE~e>)qES*u+Q5&bl{MSWh)i!UvLvpN4zNbbJq)uIHGdi+rmU zM)*W#XzRdd8dr3=Oy%6#zixd$7+ur#szWsjGY|uM-){%AXYIWeTO^7b`D^*M543h3 ziOO@;c6MZh{#%#&6!8OL(ZbKwg437q>A_E8oPVGpT^Qp13`8RPAnMv45c2wKm)F|| zYcsz#*47|k&jc=Ov4sm}7Jus6o*3RH?laZ9ftSPrF4>D1lq`p6(M41f<@*{D6$ZMC zbb~sao$(ngfv&3RYucpONAPv|LdIeNzq1 zl#bBjA$qspU0+>a-Ea|6F=KL=8lG1m0+lA?5mTS0?l&@}TD@r4Wi}j^V_L+U-vR{L z1J*GZ0DPz2;e)Mo`w?3$25z~qiOKO+ikZ&>6TxUr9w&=5Zvvd}tf1-M zLRATOKgB>V&n6CK=v2n|j|dtgN$qdimX!8({5@rtR0s}A!p&r(h!k6GV`%qruy}aP zGamlIB}T*>n%d>OQAvjoDwS)tXQ@d`Wu03lF#FccI}v%EoR7 z!6*!`hWljNkf5z@84sbb=0R$RFp4KATY7by?x&<1Q(j0dV4l;THLK#cl8|Ys6kjsZ zHf6ZqIGpSGBUE>!ew1_7yIfqK6B7|1>6fyP#1V99sw2!fosH8n1vh3< zd8{shbq0_5Av`_&23fKN9(V5RXv=8EFl(BuZfwCF2^6@=HJP^$OObU!LUVjR0ngtw zM1tZBcwi_7S~oX>Q79|4L#8Iq{Cg?3XfS5qv82 zMpt_PEKC}T0i`lG7i?S2=$lDFZK0D5sX$d6=h0&A;t)a^sXb)&PzY@T53Iu`ZN8dc z-ltN4A0Is}FBMz+)c8M;f>S!(bvP?kTbl%U{J%gvJ?+x^`zG<<*+pQ!*h+CQm46=Ll4Tp^qHB1yEdun-vlHZ*>93p?z!ELUiKPa4-Y_ z0YxaeMyyX_bH5}`GA$KQ_G(7dth5l01Pq7N$9NMT zC2_Z`j|*~?3oQQMY0PT8@NN+fQkhU1o3ga^84M1jQq;zf5)ZYQ9r}PtkO``=16CZ( z6XUjepXyS#O8rXv4VcVJ4k17 zq7H-9i!!O>&Qr)@l<1&vu?hX<^vDj(e#eX!=v+-g$IyHO;}TUem2j!j9bzJ<1?Apk zJOogVyCj(YYUDGFr0ih+n+_TYX@Z||=%+=Z?+ zT5&}83^bAJ)Fh6$$T^ULZr5BlC(1SHf}9D>MG4 zs;)ai<@~{K*rl2cnJLi~;;``OUrKGvu|q@BmE8K{od}lYP`DEQou7Y@+dM5^x=-8- z`8x&Q@5lI@I&O|JqQuWCO$TFuzchw8!6Dj4^i{%iGpuf@z8O_gWW5Z3me?H`ZbiEY zMIWI}`pF?0NIPOhf0uZ>dtk1@1r!d=-QqF9B5%rQ_w-fJ@%K?^XG}0Bf4p*%Q}D5B zBCQC^5-c6882Hs6@~;Ym{kg_fjj?9N{^;4)QOi1ZVL0h4=XrRdN%wPRLZgB)*Zqfm?0Rn;B@3|1)2JVa;IRhrsulNdTC3Wnhem=d zSa|dtQomv=&Lbt->Zxqj27d=f7M_@K0pDzOiJh_l!WkpvXtgP>B=k8A(Vf~vp*==W zm0Np0I~NqD4OP#Jchbh?iAYg@hfMDXe;Ll9JYz8IK~V~s$ZKLaIp@s(wm+FpvW6;8 zUU%VslQq)v@fql8tkWZijFo4c&lvgDfOa>MqPYm+vg2RPoD!s5->Erhs;V>apKd_J zk0*gI6%**s6W%u?aG3BEV_w6@tUo99*t^mIX=V}t>WJWy2HWDWYqkR^prOSBkny*? zO3ZMR2&dLXTYN(++cG1vZxNTAGLoi#ArZyXHwHV>4|CI1F15>h42pdN#vN{t1|O-* z2cbn!+>1087)>&d8Aa`J=q@HuK{drnIyn78&W>tU;YTOsjJhJs(!AW(*(ekip+@L6 z7M+a%?=YVIJwehLsu_^!i*LM}Pdb9^AX;r>UxG5cu{PYSN;x)o1o~52Q8G@}bs7e6 zie^byFP4a8FQV@dQnN^wFV~K#ucgJX)NoKc0Y&!jtCmqI=V;3kXOzl0{-D%342^el z7qqA2?HZxn30Y^Sv$TVkIIJwGAjND+q7IdNgYmLp1Y;%6fn)#M z%A^Zc+<%BfF`!ab1O{Gb5QaT&TH6=2d4;9T48*YSFATiUj)+TY5|1M5F^LlF?89%y zhHmBmd2Pl;!kutE$SYX3rQ?&hlEle>#v?5pKBcj<=b%(BO8!lYtt1F4Th?i%sCrwl zsn!qr#U`$dtzfV+L+{cpO@sg*C-0VBj9%I^w=0ZxzYO2P9y>z3M5(Wk2~*QPCo-il%=1a}}*| z;qkNc&hm3%QEl-psJl=!*TxVfFMvd$nLYTp`Kn5Bs_uFXRJTC}Nl)6Isstz=Cibe~ zI4e^Xz36@x^&+r0agkc7P7%i=?6CW6 z^6kDo_;N0D>eD~?=Iqm8|42@OXQ%vhZ0k=&|5QX{GN#`DP**GDWUH+<{H*<7hik=IA(U^v%r=zEd>l-*_87&hH8D1Iccg<^SJs^E*q=cgsrZ^G>3! zLgP1MLsY8P@C4m~Y04eTgnPPa=Ol|h5c_{o_O(d4D^Vbs!E(d>$xk1?2hV9UF^0=d zf;&dPbs3o@$H%2hv;#D0gB`LAz5a1azWm{{B)I*LHbS_z97Kl6n}%HLp<_ zv?DD*ayD@UDL6-Nh;;SQi$>Q*qyprfOREY`Q%;-#y)RMu0d3)SfpXJJrsR2wbFN_V^8>XiP zRwak;ftwg1`Uq2mM2-&P&5n%u|7A&U1*rQpnjH0{0O%-a6_UbP$?3o$zUBR1Whv+LWKTX>=C|_S1;AfEdwCm$wV6( znc$ic>gna}xvP`#u} literal 0 HcmV?d00001 diff --git a/retriever/database.sql b/retriever/database.sql new file mode 100644 index 00000000..2513dd28 --- /dev/null +++ b/retriever/database.sql @@ -0,0 +1,35 @@ +CREATE TABLE IF NOT EXISTS `retriever_rule` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `uid` int(11) NOT NULL, + `contact-id` int(11) NOT NULL, + `data` mediumtext NOT NULL, + PRIMARY KEY (`id`), + KEY `uid` (`uid`), + KEY `contact-id` (`contact-id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `retriever_item` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `item-uid` int(10) unsigned NOT NULL DEFAULT '0', + `contact-id` int(10) unsigned NOT NULL DEFAULT '0', + `resource` int(11) NOT NULL, + `parent` int(11) NOT NULL, + `finished` tinyint(1) unsigned NOT NULL DEFAULT '0', + KEY `resource` (`resource`), + KEY `all` (`item-uri`, `item-uid`, `contact-id`), + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +CREATE TABLE IF NOT EXISTS `retriever_resource` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `type` char(255) NOT NULL, + `binary` int(1) NOT NULL DEFAULT 0, + `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL, + `created` timestamp NOT NULL DEFAULT now(), + `completed` timestamp NULL DEFAULT NULL, + `last-try` timestamp NULL DEFAULT NULL, + `num-tries` int(11) NOT NULL DEFAULT 0, + `data` mediumtext NOT NULL, + PRIMARY KEY (`id`) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin diff --git a/retriever/retriever.php b/retriever/retriever.php new file mode 100644 index 00000000..f793ba5b --- /dev/null +++ b/retriever/retriever.php @@ -0,0 +1,738 @@ + + */ + +function retriever_install() { + register_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + register_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + register_hook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + register_hook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + register_hook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); + + $schema = file_get_contents(dirname(__file__).'/database.sql'); + $arr = explode(';', $schema); + foreach ($arr as $a) { + $r = q($a); + } + + $r = q("SELECT `id` FROM `pconfig` WHERE `cat` LIKE 'retriever_%%'"); + if (count($r) || (get_config('retriever', 'dbversion') == '0.1')) { + $retrievers = array(); + $r = q("SELECT SUBSTRING(`cat`, 10) AS `contact`, `k`, `v` FROM `pconfig` WHERE `cat` LIKE 'retriever%%'"); + foreach ($r as $rr) { + $retrievers[$rr['contact']][$rr['k']] = $rr['v']; + } + foreach ($retrievers as $k => $v) { + $rr = q("SELECT `uid` FROM `contact` WHERE `id` = %d", intval($k)); + $uid = $rr[0]['uid']; + $v['images'] = 'on'; + q("INSERT INTO `retriever_rule` (`uid`, `contact-id`, `data`) VALUES (%d, %d, '%s')", + intval($uid), intval($k), dbesc(json_encode($v))); + } + q("DELETE FROM `pconfig` WHERE `cat` LIKE 'retriever%%'"); + } + if (get_config('retriever', 'dbversion') == '0.2') { + q("ALTER TABLE `retriever_resource` DROP COLUMN `retriever`"); + } + if (get_config('retriever', 'dbversion') == '0.3') { + q("ALTER TABLE `retriever_item` MODIFY COLUMN `item-uri` varchar(800) CHARACTER SET ascii NOT NULL"); + q("ALTER TABLE `retriever_resource` MODIFY COLUMN `url` varchar(800) CHARACTER SET ascii NOT NULL"); + } + if (get_config('retriever', 'dbversion') == '0.4') { + q("ALTER TABLE `retriever_item` ADD COLUMN `finished` tinyint(1) unsigned NOT NULL DEFAULT '0'"); + } + if (get_config('retriever', 'dbversion') == '0.5') { + q('ALTER TABLE `retriever_resource` CHANGE `created` `created` timestamp NOT NULL DEFAULT now()'); + q('ALTER TABLE `retriever_resource` CHANGE `completed` `completed` timestamp NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_resource` CHANGE `last-try` `last-try` timestamp NULL DEFAULT NULL'); + q('ALTER TABLE `retriever_item` DROP KEY `all`'); + q('ALTER TABLE `retriever_item` ADD KEY `all` (`item-uri`, `item-uid`, `contact-id`)'); + } + if (get_config('retriever', 'dbversion') == '0.6') { + q('ALTER TABLE `retriever_item` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + q('ALTER TABLE `retriever_item` CHANGE `item-uri` `item-uri` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); + q('ALTER TABLE `retriever_resource` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + q('ALTER TABLE `retriever_resource` CHANGE `url` `url` varchar(800) CHARACTER SET ascii COLLATE ascii_bin NOT NULL'); + q('ALTER TABLE `retriever_rule` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin'); + } + if (get_config('retriever', 'dbversion') == '0.7') { + $r = q("SELECT `id`, `data` FROM `retriever_rule`"); + foreach ($r as $rr) { + logger('retriever_install: retriever ' . $rr['id'] . ' old config ' . $rr['data'], LOGGER_DATA); + $data = json_decode($rr['data'], true); + if ($data['pattern']) { + $matches = array(); + if (preg_match("/\/(.*)\//", $data['pattern'], $matches)) { + $data['pattern'] = $matches[1]; + } + } + if ($data['match']) { + $include = array(); + foreach (explode('|', $data['match']) as $component) { + $matches = array(); + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { + $include[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { + $include[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + } + $data['include'] = $include; + unset($data['match']); + } + if ($data['remove']) { + $exclude = array(); + foreach (explode('|', $data['remove']) as $component) { + $matches = array(); + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[@([A-Za-z][a-z0-9]*)='([^']*)'\]/", $component, $matches)) { + $exclude[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + if (preg_match("/([A-Za-z][A-Za-z0-9]*)\[contains(concat(' ',normalize-space(@class),' '),' ([^ ']+) ')]/", $component, $matches)) { + $exclude[] = array( + 'element' => $matches[1], + 'attribute' => $matches[2], + 'value' => $matches[3]); + } + } + $data['exclude'] = $exclude; + unset($data['remove']); + } + $r = q('UPDATE `retriever_rule` SET `data` = "%s" WHERE `id` = %d', dbesc(json_encode($data)), $rr['id']); + logger('retriever_install: retriever ' . $rr['id'] . ' new config ' . json_encode($data), LOGGER_DATA); + } + } + set_config('retriever', 'dbversion', '0.8'); +} + +function retriever_uninstall() { + unregister_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + unregister_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + unregister_hook('post_remote', 'addon/retriever/retriever.php', 'retriever_post_remote_hook'); + unregister_hook('plugin_settings', 'addon/retriever/retriever.php', 'retriever_plugin_settings'); + unregister_hook('plugin_settings_post', 'addon/retriever/retriever.php', 'retriever_plugin_settings_post'); + unregister_hook('contact_photo_menu', 'addon/retriever/retriever.php', 'retriever_contact_photo_menu'); + unregister_hook('cron', 'addon/retriever/retriever.php', 'retriever_cron'); +} + +function retriever_module() {} + +function retriever_cron($a, $b) { + // 100 is a nice sane number. Maybe this should be configurable. + retriever_retrieve_items(100); + retriever_tidy(); +} + +$retriever_item_count = 0; + +function retriever_retrieve_items($max_items) { + global $retriever_item_count; + + $retriever_schedule = array(array(1,'minute'), + array(10,'minute'), + array(1,'hour'), + array(1,'day'), + array(2,'day'), + array(1,'week'), + array(1,'month')); + + $schedule_clauses = array(); + for ($i = 0; $i < count($retriever_schedule); $i++) { + $num = $retriever_schedule[$i][0]; + $unit = $retriever_schedule[$i][1]; + array_push($schedule_clauses, + '(`num-tries` = ' . $i . ' AND TIMESTAMPADD(' . dbesc($unit) . + ', ' . intval($num) . ', `last-try`) < now())'); + } + + $retrieve_items = $max_items - $retriever_item_count; + do { + $r = q("SELECT * FROM `retriever_resource` WHERE `completed` IS NULL AND (`last-try` IS NULL OR %s) ORDER BY `last-try` ASC LIMIT %d", + dbesc(implode($schedule_clauses, ' OR ')), + intval($retrieve_items)); + if (count($r) == 0) { + break; + } + foreach ($r as $rr) { + retrieve_resource($rr); + $retriever_item_count++; + } + $retrieve_items = $max_items - $retriever_item_count; + } + while ($retrieve_items > 0); + + /* Look for items that are waiting even though the resource has + * completed. This usually happens because we've been asked to + * retrospectively apply a config change. It could also happen + * due to a cron job dying or something. */ + $r = q("SELECT retriever_resource.`id` as resource, retriever_item.`id` as item FROM retriever_resource, retriever_item, retriever_rule WHERE retriever_item.`finished` = 0 AND retriever_item.`resource` = retriever_resource.`id` AND retriever_resource.`completed` IS NOT NULL AND retriever_item.`contact-id` = retriever_rule.`contact-id` AND retriever_item.`item-uid` = retriever_rule.`uid` LIMIT %d", + intval($retrieve_items)); + if (!$r) { + $r = array(); + } + foreach ($r as $rr) { + $resource = q("SELECT * FROM retriever_resource WHERE `id` = %d", $rr['resource']); + $retriever_item = retriever_get_retriever_item($rr['item']); + if (!$retriever_item) { + logger('retriever_retrieve_items: no retriever item with id ' . $rr['item']); + continue; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + logger('retriever_retrieve_items: no item ' . $retriever_item['item-uri']); + continue; + } + $retriever = get_retriever($item['contact-id'], $item['uid']); + if (!$retriever) { + logger('retriever_retrieve_items: no retriever for item ' . + $retriever_item['item-uri'] . ' ' . $retriever_item['uid'] . ' ' . $item['contact-id']); + continue; + } + retriever_apply_completed_resource_to_item($retriever, $item, $resource[0]); + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", + intval($retriever_item['id'])); + retriever_check_item_completed($item); + } +} + +function retriever_tidy() { + q("DELETE FROM retriever_resource WHERE completed IS NOT NULL AND completed < DATE_SUB(now(), INTERVAL 1 WEEK)"); + q("DELETE FROM retriever_resource WHERE completed IS NULL AND created < DATE_SUB(now(), INTERVAL 3 MONTH)"); + + $r = q("SELECT retriever_item.id FROM retriever_item LEFT OUTER JOIN retriever_resource ON (retriever_item.resource = retriever_resource.id) WHERE retriever_resource.id is null"); + foreach ($r as $rr) { + q('DELETE FROM retriever_item WHERE id = %d', intval($rr['id'])); + } +} + +function retrieve_resource($resource) { + logger('retrieve_resource: ' . ($resource['num-tries'] + 1) . + ' attempt at resource ' . $resource['id'] . ' ' . $resource['url'], LOGGER_DEBUG); + q("UPDATE `retriever_resource` SET `last-try` = now(), `num-tries` = `num-tries` + 1 WHERE id = %d", + intval($resource['id'])); + $data = fetch_url($resource['url'], $resource['binary'], $resource['type']); + $resource['type'] = get_app()->get_curl_content_type(); + if ($data) { + $resource['data'] = $data; + q("UPDATE `retriever_resource` SET `completed` = now(), `data` = '%s', `type` = '%s' WHERE id = %d", + dbesc($data), dbesc($resource['type']), intval($resource['id'])); + retriever_resource_completed($resource); + } +} + +function get_retriever($contact_id, $uid, $create = false) { + $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", + intval($contact_id), intval($uid)); + if (count($r)) { + $r[0]['data'] = json_decode($r[0]['data'], true); + return $r[0]; + } + if ($create) { + q("INSERT INTO `retriever_rule` (`uid`, `contact-id`) VALUES (%d, %d)", + intval($uid), intval($contact_id)); + $r = q("SELECT * FROM `retriever_rule` WHERE `contact-id` = %d AND `uid` = %d", + intval($contact_id), intval($uid)); + return $r[0]; + } +} + +function retriever_get_retriever_item($id) { + $retriever_items = q("SELECT * FROM `retriever_item` WHERE id = %d", intval($id)); + if (count($retriever_items) != 1) { + logger('retriever_get_retriever_item: unable to find retriever_item ' . $id, LOGGER_NORMAL); + return; + } + return $retriever_items[0]; +} + +function retriever_get_item($retriever_item) { + $items = q("SELECT * FROM `item` WHERE `uri` = '%s' AND `uid` = %d AND `contact-id` = %d", + dbesc($retriever_item['item-uri']), + intval($retriever_item['item-uid']), + intval($retriever_item['contact-id'])); + if (count($items) != 1) { + logger('retriever_get_item: unexpected number of results ' . + count($items) . " when searching for item $uri $uid $cid", LOGGER_NORMAL); + return; + } + return $items[0]; +} + +function retriever_item_completed($retriever_item_id, $resource) { + logger('retriever_item_completed: id ' . $retriever_item_id . ' url ' . $resource['url'], LOGGER_DEBUG); + + $retriever_item = retriever_get_retriever_item($retriever_item_id); + if (!$retriever_item) { + return; + } + $retriever = get_retriever($retriever_item['contact-id'], $retriever_item['item-uid']); + if (!$retriever) { + return; + } + $item = retriever_get_item($retriever_item); + if (!$item) { + return; + } + + retriever_apply_completed_resource_to_item($retriever, $item, $resource); + + q("UPDATE `retriever_item` SET `finished` = 1 WHERE id = %d", + intval($retriever_item['id'])); + retriever_check_item_completed($item); +} + +function retriever_resource_completed($resource) { + logger('retriever_resource_completed: id ' . $resource['id'] . ' url ' . $resource['url'], LOGGER_DEBUG); + $r = q("SELECT `id` FROM `retriever_item` WHERE `resource` = %d", $resource['id']); + foreach ($r as $rr) { + retriever_item_completed($rr['id'], $resource); + } +} + +function apply_retrospective($retriever, $num) { + $r = q("SELECT * FROM `item` WHERE `contact-id` = %d ORDER BY `received` DESC LIMIT %d", + intval($retriever['contact-id']), intval($num)); + foreach ($r as $item) { + q('UPDATE `item` SET `visible` = 0 WHERE `id` = %d', $item['id']); + retriever_on_item_insert($retriever, $item); + } +} + +function retriever_on_item_insert($retriever, &$item) { + if (!$retriever || !$retriever['id']) { + logger('retriever_on_item_insert: No retriever supplied', LOGGER_NORMAL); + return; + } + if (!$retriever["data"]['enable'] == "on") { + return; + } + if ($retriever["data"]['pattern']) { + $url = preg_replace('/' . $retriever["data"]['pattern'] . '/', $retriever["data"]['replace'], $item['plink']); + logger('retriever_on_item_insert: Changed ' . $item['plink'] . ' to ' . $url, LOGGER_DATA); + } + else { + $url = $item['plink']; + } + + $resource = add_retriever_resource($url); + $retriever_item_id = add_retriever_item($item, $resource); +} + +function add_retriever_resource($url, $binary = false) { + logger('add_retriever_resource: ' . $url, LOGGER_DEBUG); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url)); + $resource = $r[0]; + if (count($r)) { + logger('add_retriever_resource: Resource ' . $url . ' already requested', LOGGER_DEBUG); + return $r[0]; + } + else { + q("INSERT INTO `retriever_resource` (`binary`, `url`) " . + "VALUES (%d, '%s')", intval($binary ? 1 : 0), dbesc($url)); + $r = q("SELECT * FROM `retriever_resource` WHERE `url` = '%s'", dbesc($url)); + return $r[0]; + } +} + +function add_retriever_item(&$item, $resource) { + logger('add_retriever_item: ' . $resource['url'] . ' for ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + + q("INSERT INTO `retriever_item` (`item-uri`, `item-uid`, `contact-id`, `resource`) " . + "VALUES ('%s', %d, %d, %d)", + dbesc($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource["id"])); + $r = q("SELECT id FROM `retriever_item` WHERE " . + "`item-uri` = '%s' AND `item-uid` = %d AND `contact-id` = %d AND `resource` = %d ORDER BY id DESC", + dbesc($item['uri']), intval($item['uid']), intval($item['contact-id']), intval($resource['id'])); + if (!count($r)) { + logger("add_retriever_item: couldn't create retriever item for " . + $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], + LOGGER_NORMAL); + return; + } + logger('add_retriever_item: created retriever_item ' . $r[0]['id'] . ' for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + return $r[0]['id']; +} + +function retriever_get_encoding($resource) { + $matches = array(); + if (preg_match('/charset=(.*)/', $resource['type'], $matches)) { + return trim(array_pop($matches)); + } + return 'utf-8'; +} + +function retriever_construct_xpath($spec) { + if (gettype($spec) != "array") { + return; + } + $components = array(); + foreach ($spec as $clause) { + if (!$clause['attribute']) { + $components[] = $clause['element']; + continue; + } + if ($clause['attribute'] === 'class') { + $components[] = + $clause['element'] . + "[contains(concat(' ', normalize-space(@class), ' '), ' " . + $clause['value'] . " ')]"; + } + else { + $components[] = + $clause['element'] . '[@' . + $clause['attribute'] . "='" . + $clause['value'] . "']"; + } + } + // It would be better to do this in smarty3 in extract.tpl + return implode('|', $components); +} + +function retriever_apply_dom_filter($retriever, &$item, $resource) { + logger('retriever_apply_dom_filter: applying XSLT to ' . $item['id'] . ' ' . $item['plink'], LOGGER_DEBUG); + require_once('include/html2bbcode.php'); + + if (!$retriever['data']['include']) { + return; + } + if (!$resource['data']) { + logger('retriever_apply_dom_filter: no text to work with', LOGGER_NORMAL); + return; + } + + $encoding = retriever_get_encoding($resource); + logger('@@@ item type ' . $resource['type'] . ' encoding ' . $encoding); + $extracter_template = get_markup_template('extract.tpl', 'addon/retriever/'); + $doc = new DOMDocument('1.0', 'utf-8'); + if (strpos($resource['type'], 'html') !== false) { + @$doc->loadHTML($resource['data']); + } + else { + $doc->loadXML($resource['data']); + } + logger('@@@ actual encoding of document is ' . $doc->encoding); + + $components = parse_url($item['plink']); + $rooturl = $components['scheme'] . "://" . $components['host']; + $dirurl = $rooturl . dirname($components['path']) . "/"; + + $params = array('$include' => retriever_construct_xpath($retriever['data']['include']), + '$exclude' => retriever_construct_xpath($retriever['data']['exclude']), + '$pageurl' => $item['plink'], + '$dirurl' => $dirurl, + '$rooturl' => $rooturl); + $xslt = replace_macros($extracter_template, $params); + $xmldoc = new DOMDocument(); + $xmldoc->loadXML($xslt); + $xp = new XsltProcessor(); + $xp->importStylesheet($xmldoc); + $transformed = $xp->transformToXML($doc); + $item['body'] = html2bbcode($transformed); + if (!strlen($item['body'])) { + logger('retriever_apply_dom_filter retriever ' . $retriever['id'] . ' item ' . $item['id'] . ': output was empty', LOGGER_NORMAL); + return; + } + $item['body'] .= "\n\n" . t('Retrieved') . ' ' . date("Y-m-d") . ': [url='; + $item['body'] .= $item['plink']; + $item['body'] .= ']' . $item['plink'] . '[/url]'; + q("UPDATE `item` SET `body` = '%s', `edited` = '%s' WHERE `id` = %d", + dbesc($item['body']), dbesc(datetime_convert('UTC', 'UTC')), intval($item['id'])); +} + +function retrieve_images(&$item) { + $matches1 = array(); + preg_match_all("/\[img\=([0-9]*)x([0-9]*)\](.*?)\[\/img\]/ism", $item["body"], $matches1); + $matches2 = array(); + preg_match_all("/\[img\](.*?)\[\/img\]/ism", $item["body"], $matches2); + $matches = array_merge($matches1[3], $matches2[1]); + logger('retrieve_images: found ' . count($matches) . ' images for item ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + foreach ($matches as $url) { + if (strpos($url, get_app()->get_baseurl()) === FALSE) { + $resource = add_retriever_resource($url, true); + if (!$resource['completed']) { + add_retriever_item($item, $resource); + } + else { + retriever_transform_images($item, $resource); + } + } + } +} + +function retriever_check_item_completed(&$item) +{ + $r = q('SELECT count(*) FROM retriever_item WHERE `item-uri` = "%s" ' . + 'AND `item-uid` = %d AND `contact-id` = %d AND `finished` = 0', + dbesc($item['uri']), intval($item['uid']), + intval($item['contact-id'])); + $waiting = $r[0]['count(*)']; + logger('retriever_check_item_completed: item ' . $item['uri'] . ' ' . $item['uid'] + . ' '. $item['contact-id'] . ' waiting for ' . $waiting . ' resources', LOGGER_DEBUG); + $old_visible = $item['visible']; + $item['visible'] = $waiting ? 0 : 1; + if (($item['id'] > 0) && ($old_visible != $item['visible'])) { + logger('retriever_check_item_completed: changing visible flag to ' . $item['visible'] . ' and invoking notifier ("edit_post", ' . $item['id'] . ')', LOGGER_DEBUG); + q("UPDATE `item` SET `visible` = %d, `edited` = '%s' WHERE `id` = %d", + intval($item['visible']), + dbesc(datetime_convert('UTC', 'UTC')), + intval($item['id'])); + proc_run('php', "include/notifier.php", 'edit_post', $item['id']); + } +} + +function retriever_apply_completed_resource_to_item($retriever, &$item, $resource) { + logger('retriever_apply_completed_resource_to_item: retriever ' . + ($retriever ? $retriever['id'] : 'none') . + ' resource ' . $resource['url'] . ' plink ' . $item['plink'], LOGGER_DEBUG); + if (strpos($resource['type'], 'image') !== false) { + retriever_transform_images($item, $resource); + } + if (!$retriever) { + return; + } + if ((strpos($resource['type'], 'html') !== false) || + (strpos($resource['type'], 'xml') !== false)) { + retriever_apply_dom_filter($retriever, $item, $resource); + if ($retriever["data"]['images'] ) { + retrieve_images($item); + } + } +} + +function retriever_store_photo($item, &$resource) { + $hash = photo_new_resource(); + + if (class_exists('Imagick')) { + try { + $image = new Imagick(); + $image->readImageBlob($resource['data']); + $resource['width'] = $image->getImageWidth(); + $resource['height'] = $image->getImageHeight(); + } + catch (Exception $e) { + logger("ImageMagick couldn't process image " . $resource['id'] . " " . $resource['url'] . ' length ' . strlen($resource['data']) . ': ' . $e->getMessage(), LOGGER_DEBUG); + return false; + } + } + if (!array_key_exists('width', $resource)) { + $image = @imagecreatefromstring($resource['data']); + if ($image === false) { + logger("Couldn't process image " . $resource['id'] . " " . $resource['url'], LOGGER_DEBUG); + return false; + } + $resource['width'] = imagesx($image); + $resource['height'] = imagesy($image); + imagedestroy($image); + } + + $url_components = parse_url($resource['url']); + $filename = basename($url_components['path']); + if (!strlen($filename)) { + $filename = 'image'; + } + $r = q("INSERT INTO `photo` + ( `uid`, `contact-id`, `guid`, `resource-id`, `created`, `edited`, `filename`, `type`, `album`, `height`, `width`, `datasize`, `data` ) + VALUES ( %d, %d, '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d, %d, %d, '%s' )", + intval($item['item-uid']), + intval($item['contact-id']), + dbesc(get_guid()), + dbesc($hash), + dbesc(datetime_convert()), + dbesc(datetime_convert()), + dbesc($filename), + dbesc($resource['type']), + dbesc('Retrieved Images'), + intval($resource['height']), + intval($resource['width']), + intval(strlen($resource['data'])), + dbesc($resource['data']) + ); + + return $hash; +} + +function retriever_transform_images(&$item, $resource) { + require_once('include/Photo.php'); + + if (!$resource["data"]) { + logger('retriever_transform_images: no data available for ' + . $resource['id'] . ' ' . $resource['url'], LOGGER_NORMAL); + return; + } + + $hash = retriever_store_photo($item, $resource); + if ($hash === false) { + logger('retriever_transform_images: unable to store photo ' + . $resource['id'] . ' ' . $resource['url'], LOGGER_NORMAL); + return; + } + + $new_url = get_app()->get_baseurl() . '/photo/' . $hash; + logger('retriever_transform_images: replacing ' . $resource['url'] . ' with ' . + $new_url . ' in item ' . $item['plink'], LOGGER_DEBUG); + $transformed = str_replace($resource["url"], $new_url, $item['body']); + if ($transformed === $item['body']) { + return; + } + + $item['body'] = $transformed; + q("UPDATE `item` SET `edited` = '%s', `body` = '%s' WHERE `plink` = '%s' AND `uid` = %d AND `contact-id` = %d", + dbesc(datetime_convert('UTC', 'UTC')), + dbesc($item['body']), + dbesc($item['plink']), + intval($item['uid']), + intval($item['contact-id'])); +} + +function retriever_content($a) { + if (!local_user()) { + $a->page['content'] .= "

Please log in

"; + return; + } + if ($a->argv[1] === 'help') { + $feeds = q("SELECT `id`, `name`, `thumb` FROM contact WHERE `uid` = %d AND `network` = 'feed'", + local_user()); + foreach ($feeds as $k=>$v) { + $feeds[$k]['url'] = $a->get_baseurl() . '/retriever/' . $v['id']; + } + $template = get_markup_template('/help.tpl', 'addon/retriever/'); + $a->page['content'] .= replace_macros($template, array( + '$config' => $a->get_baseurl() . '/settings/addon', + '$feeds' => $feeds)); + return; + } + if ($a->argv[1]) { + $retriever = get_retriever($a->argv[1], local_user(), false); + + if (x($_POST["id"])) { + $retriever = get_retriever($a->argv[1], local_user(), true); + $retriever["data"] = array(); + foreach (array('pattern', 'replace', 'enable', 'images') as $setting) { + if (x($_POST['retriever_' . $setting])) { + $retriever["data"][$setting] = $_POST['retriever_' . $setting]; + } + } + foreach ($_POST as $k=>$v) { + if (preg_match("/retriever-(include|exclude)-(\d+)-(element|attribute|value)/", $k, $matches)) { + $retriever['data'][$matches[1]][intval($matches[2])][$matches[3]] = $v; + } + } + // You've gotta have an element, even if it's just "*" + foreach ($retriever['data']['include'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever['data']['include'][$k]); + } + } + foreach ($retriever['data']['exclude'] as $k=>$clause) { + if (!$clause['element']) { + unset($retriever['data']['exclude'][$k]); + } + } + q("UPDATE `retriever_rule` SET `data`='%s' WHERE `id` = %d", + dbesc(json_encode($retriever["data"])), intval($retriever["id"])); + $a->page['content'] .= "

Settings Updated"; + if (x($_POST["retriever_retrospective"])) { + apply_retrospective($retriever, $_POST["retriever_retrospective"]); + $a->page['content'] .= " and retrospectively applied to " . $_POST["apply"] . " posts"; + } + $a->page['content'] .= ".

"; + } + + $template = get_markup_template('/rule-config.tpl', 'addon/retriever/'); + $a->page['content'] .= replace_macros($template, array( + '$enable' => array( + 'retriever_enable', + t('Enabled'), + $retriever['data']['enable']), + '$pattern' => array( + 'retriever_pattern', + t('URL Pattern'), + $retriever["data"]['pattern'], + t('Regular expression matching part of the URL to replace')), + '$replace' => array( + 'retriever_replace', + t('URL Replace'), + $retriever["data"]['replace'], + t('Text to replace matching part of above regular expression')), + '$images' => array( + 'retriever_images', + t('Download Images'), + $retriever['data']['images']), + '$retrospective' => array( + 'retriever_retrospective', + t('Retrospectively Apply'), + '0', + t('Reapply the rules to this number of posts')), + '$title' => t('Retrieve Feed Content'), + '$help' => $a->get_baseurl() . '/retriever/help', + '$submit' => t('Submit'), + '$id' => ($retriever["id"] ? $retriever["id"] : "create"), + '$tag_t' => t('Tag'), + '$attribute_t' => t('Attribute'), + '$value_t' => t('Value'), + '$add_t' => t('Add'), + '$remove_t' => t('Remove'), + '$include_t' => t('Include'), + '$include' => $retriever['data']['include'], + '$exclude_t' => t('Exclude'), + '$exclude' => $retriever["data"]['exclude'])); + return; + } +} + +function retriever_contact_photo_menu($a, &$args) { + if (!$args) { + return; + } + if ($args["contact"]["network"] == "feed") { + $args["menu"][ 'retriever' ] = array(t('Retriever'), $a->get_baseurl() . '/retriever/' . $args["contact"]['id']); + } +} + +function retriever_post_remote_hook(&$a, &$item) { + logger('retriever_post_remote_hook: ' . $item['uri'] . ' ' . $item['uid'] . ' ' . $item['contact-id'], LOGGER_DEBUG); + + $retriever = get_retriever($item['contact-id'], $item["uid"], false); + if ($retriever) { + retriever_on_item_insert($retriever, $item); + } + else { + if (get_pconfig($item["uid"], 'retriever', 'all_photos')) { + retrieve_images($item, null); + } + } + retriever_check_item_completed($item); +} + +function retriever_plugin_settings(&$a,&$s) { + $all_photos = get_pconfig(local_user(), 'retriever', 'all_photos'); + $all_photos_mu = ($all_photos == 'on') ? ' checked="true"' : ''; + $template = get_markup_template('/settings.tpl', 'addon/retriever/'); + $s .= replace_macros($template, array( + '$submit' => t('Submit'), + '$title' => t('Retriever Settings'), + '$help' => $a->get_baseurl() . '/retriever/help', + '$all_photos' => $all_photos_mu, + '$all_photos_t' => t('All Photos'))); +} + +function retriever_plugin_settings_post($a,$post) { + if ($_POST['all_photos']) { + set_pconfig(local_user(), 'retriever', 'all_photos', $_POST['all_photos']); + } + else { + del_pconfig(local_user(), 'retriever', 'all_photos'); + } +} diff --git a/retriever/view/extract.tpl b/retriever/view/extract.tpl new file mode 100644 index 00000000..e851ddb5 --- /dev/null +++ b/retriever/view/extract.tpl @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + +{{ if $exclude }} + +{{ endif }} + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/retriever/view/help.tpl b/retriever/view/help.tpl new file mode 100644 index 00000000..37789a52 --- /dev/null +++ b/retriever/view/help.tpl @@ -0,0 +1,148 @@ +

Retriever Plugin Help

+

+This plugin replaces the short excerpts you normally get in RSS feeds +with the full content of the article from the source website. You +specify which part of the page you're interested in with a set of +rules. When each item arrives, the plugin downloads the full page +from the website, extracts content using the rules, and replaces the +original article. +

+

+There's a few reasons you may want to do this. The source website +might be slow or overloaded. The source website might be +untrustworthy, in which case using Friendica to scrub the HTML is a +good idea. You might be on a LAN that blacklists certain websites. +It also works neatly with the mailstream plugin, allowing you to read +a news stream comfortably without needing continuous Internet +connectivity. +

+

+However, setting up retriever can be quite tricky since it depends on +the internal design of the website. This was designed to make life +easy for the website's developers, not for you. You'll need to have +some familiarity with HTML, and be willing to adapt when the website +suddenly changes everything without notice. +

+

Configuring Retriever for a feed

+

+To set up retriever for an RSS feed, go to the "Contacts" page and +find your feed. Then click on the drop-down menu on the contact. +Select "Retriever" to get to the retriever configuration. +

+

+The "Include" configuration section specifies parts of the page to +include in the article. Each row has three components: +

+
    +
  • An HTML tag (e.g. "div", "span", "p")
  • +
  • An attribute (usually "class" or "id")
  • +
  • A value for the attribute
  • +
+

+A simple case is when the article is wrapped in a "div" element: +

+
+    ...
+    <div class="main-content">
+      <h2>Man Bites Dog</h2>
+      <img src="mbd.jpg">
+      <p>
+        Residents of the sleepy community of Nowheresville were
+        shocked yesterday by the sight of creepy local weirdo Jim
+        McOddman assaulting innocent local dog Snufflekins with his
+        false teeth.
+      </p>
+      ...
+    </div>
+    ...
+
+

+You then specify the tag "div", attribute "class", and value +"main-content". Everything else in the page, such as navigation +panels and menus and footers and so on, will be discarded. If there +is more than one section of the page you want to include, specify each +one on a separate row. If the matching section contains some sections +you want to remove, specify those in the "Exclude" section in the same +way. +

+

+Once you've got a configuration that you think will work, you can try +it out on some existing articles. Type a number into the +"Retrospectively Apply" box and click "Submit". After a while +(exactly how long depends on your system's cron configuration) the new +articles should be available. +

+

Techniques

+

+You can leave the attribute and value blank to include all the +corresponding elements with the specified tag name. You can also use +a tag name of "*", which will match any element type with the +specified attribute regardless of the tag. +

+

+Note that the "class" attribute is a special case. Many web page +templates will put multiple different classes in the same element, +separated by spaces. If you specify an attribute of "class" it will +match an element if any of its classes matches the specified value. +For example: +

+
+    <div class="article breaking-news">
+
+

+In this case you can specify a value of "article", or "breaking-news". +You can also specify "article breaking-news", but that won't match if +the website suddenly changes to "breaking-news article", so that's not +recommended. +

+

+One useful trick you can try is using the website's "print" pages. +Many news sites have print versions of all their articles. These are +usually drastically simplified compared to the live website page. +Sometimes this is a good way to get the whole article when it's +normally split across multiple pages. +

+

+Hopefully the URL for the print page is a predictable variant of the +normal article URL. For example, an article URL like: +

+
+    http://www.newssite.com/article-8636.html
+
+

+...might have a print version at: +

+
+    http://www.newssite.com/print/article-8636.html
+
+

+To change the URL used to retrieve the page, use the "URL Pattern" and +"URL Replace" fields. The pattern is a regular expression matching +part of the URL to replace. In this case, you might use a pattern of +"/article" and a replace string of "/print/article". A common pattern +is simply "$", used to add the replace string to the end of the URL. +

+

Background Processing

+

+Note that retrieving and processing the articles can take some time, +so it's done in the background. Incoming articles will be marked as +invisible while they're in the process of being downloaded. If a URL +fails, the plugin will keep trying at progressively longer intervals +for up to a month, in case the website is temporarily overloaded or +the network is down. +

+

Retrieving Images

+

+Retriever can also optionally download images and store them in the +local Friendica instance. Just check the "Download Images" box. You +can also download images in every item from your network, whether it's +an RSS feed or not. Go to the "Settings" page and +click "Plugin settings". Then check the "All +Photos" box in the "Retriever Settings" section and click "Submit". +

+

Configure Feeds:

+
+{{ for $feeds as $feed }} +{{ inc contact_template.tpl with $contact=$feed }}{{ endinc }} +{{ endfor }} +
diff --git a/retriever/view/rule-config.tpl b/retriever/view/rule-config.tpl new file mode 100644 index 00000000..3bd2bfce --- /dev/null +++ b/retriever/view/rule-config.tpl @@ -0,0 +1,111 @@ +
+ +

$title

+

Get Help

+
+ +{{ inc field_checkbox.tpl with $field=$enable }}{{ endinc }} +{{ inc field_input.tpl with $field=$pattern }}{{ endinc }} +{{ inc field_input.tpl with $field=$replace }}{{ endinc }} +{{ inc field_checkbox.tpl with $field=$images }}{{ endinc }} +{{ inc field_input.tpl with $field=$retrospective }}{{ endinc }} +

$include_t:

+
+ + + + + +{{ if $include }} + {{ for $include as $k=>$m }} + + + + + + + {{ endfor }} +{{ else }} + + + + + + +{{ endif }} + +
$tag_t$attribute_t$value_t
+ +
+

$exclude_t:

+
+ + + + + +{{ if $exclude }} + {{ for $exclude as $k=>$r }} + + + + + + + {{ endfor }} +{{ else }} + + + + + + +{{ endif }} + +
TagAttributeValue
+ +
+ +
+
diff --git a/retriever/view/settings.tpl b/retriever/view/settings.tpl new file mode 100644 index 00000000..87b95287 --- /dev/null +++ b/retriever/view/settings.tpl @@ -0,0 +1,17 @@ +
+

$title

+

+ Get Help +

+ + + + + + + + + + +
$all_photos_t:
+
diff --git a/retriever/view/smarty3/extract.tpl b/retriever/view/smarty3/extract.tpl new file mode 100644 index 00000000..36b91813 --- /dev/null +++ b/retriever/view/smarty3/extract.tpl @@ -0,0 +1,41 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} + + + + + + + + + + + + + + + + + +{{if $exclude}} + +{{/if}} + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/retriever/view/smarty3/help.tpl b/retriever/view/smarty3/help.tpl new file mode 100644 index 00000000..9e481188 --- /dev/null +++ b/retriever/view/smarty3/help.tpl @@ -0,0 +1,153 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +

Retriever Plugin Help

+

+This plugin replaces the short excerpts you normally get in RSS feeds +with the full content of the article from the source website. You +specify which part of the page you're interested in with a set of +rules. When each item arrives, the plugin downloads the full page +from the website, extracts content using the rules, and replaces the +original article. +

+

+There's a few reasons you may want to do this. The source website +might be slow or overloaded. The source website might be +untrustworthy, in which case using Friendica to scrub the HTML is a +good idea. You might be on a LAN that blacklists certain websites. +It also works neatly with the mailstream plugin, allowing you to read +a news stream comfortably without needing continuous Internet +connectivity. +

+

+However, setting up retriever can be quite tricky since it depends on +the internal design of the website. This was designed to make life +easy for the website's developers, not for you. You'll need to have +some familiarity with HTML, and be willing to adapt when the website +suddenly changes everything without notice. +

+

Configuring Retriever for a feed

+

+To set up retriever for an RSS feed, go to the "Contacts" page and +find your feed. Then click on the drop-down menu on the contact. +Select "Retriever" to get to the retriever configuration. +

+

+The "Include" configuration section specifies parts of the page to +include in the article. Each row has three components: +

+
    +
  • An HTML tag (e.g. "div", "span", "p")
  • +
  • An attribute (usually "class" or "id")
  • +
  • A value for the attribute
  • +
+

+A simple case is when the article is wrapped in a "div" element: +

+
+    ...
+    <div class="main-content">
+      <h2>Man Bites Dog</h2>
+      <img src="mbd.jpg">
+      <p>
+        Residents of the sleepy community of Nowheresville were
+        shocked yesterday by the sight of creepy local weirdo Jim
+        McOddman assaulting innocent local dog Snufflekins with his
+        false teeth.
+      </p>
+      ...
+    </div>
+    ...
+
+

+You then specify the tag "div", attribute "class", and value +"main-content". Everything else in the page, such as navigation +panels and menus and footers and so on, will be discarded. If there +is more than one section of the page you want to include, specify each +one on a separate row. If the matching section contains some sections +you want to remove, specify those in the "Exclude" section in the same +way. +

+

+Once you've got a configuration that you think will work, you can try +it out on some existing articles. Type a number into the +"Retrospectively Apply" box and click "Submit". After a while +(exactly how long depends on your system's cron configuration) the new +articles should be available. +

+

Techniques

+

+You can leave the attribute and value blank to include all the +corresponding elements with the specified tag name. You can also use +a tag name of "*", which will match any element type with the +specified attribute regardless of the tag. +

+

+Note that the "class" attribute is a special case. Many web page +templates will put multiple different classes in the same element, +separated by spaces. If you specify an attribute of "class" it will +match an element if any of its classes matches the specified value. +For example: +

+
+    <div class="article breaking-news">
+
+

+In this case you can specify a value of "article", or "breaking-news". +You can also specify "article breaking-news", but that won't match if +the website suddenly changes to "breaking-news article", so that's not +recommended. +

+

+One useful trick you can try is using the website's "print" pages. +Many news sites have print versions of all their articles. These are +usually drastically simplified compared to the live website page. +Sometimes this is a good way to get the whole article when it's +normally split across multiple pages. +

+

+Hopefully the URL for the print page is a predictable variant of the +normal article URL. For example, an article URL like: +

+
+    http://www.newssite.com/article-8636.html
+
+

+...might have a print version at: +

+
+    http://www.newssite.com/print/article-8636.html
+
+

+To change the URL used to retrieve the page, use the "URL Pattern" and +"URL Replace" fields. The pattern is a regular expression matching +part of the URL to replace. In this case, you might use a pattern of +"/article" and a replace string of "/print/article". A common pattern +is simply "$", used to add the replace string to the end of the URL. +

+

Background Processing

+

+Note that retrieving and processing the articles can take some time, +so it's done in the background. Incoming articles will be marked as +invisible while they're in the process of being downloaded. If a URL +fails, the plugin will keep trying at progressively longer intervals +for up to a month, in case the website is temporarily overloaded or +the network is down. +

+

Retrieving Images

+

+Retriever can also optionally download images and store them in the +local Friendica instance. Just check the "Download Images" box. You +can also download images in every item from your network, whether it's +an RSS feed or not. Go to the "Settings" page and +click "Plugin settings". Then check the "All +Photos" box in the "Retriever Settings" section and click "Submit". +

+

Configure Feeds:

+
+{{foreach $feeds as $feed}} +{{include file="contact_template.tpl" contact=$feed}} +{{/foreach}} +
diff --git a/retriever/view/smarty3/rule-config.tpl b/retriever/view/smarty3/rule-config.tpl new file mode 100644 index 00000000..79add59a --- /dev/null +++ b/retriever/view/smarty3/rule-config.tpl @@ -0,0 +1,116 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +
+ +

{{$title}}

+

Get Help

+
+ +{{include file="field_checkbox.tpl" field=$enable}} +{{include file="field_input.tpl" field=$pattern}} +{{include file="field_input.tpl" field=$replace}} +{{include file="field_checkbox.tpl" field=$images}} +{{include file="field_input.tpl" field=$retrospective}} +

{{$include_t}}:

+
+ + + + + +{{if $include}} + {{foreach $include as $k=>$m}} + + + + + + + {{/foreach}} +{{else}} + + + + + + +{{/if}} + +
{{$tag_t}}{{$attribute_t}}{{$value_t}}
+ +
+

{{$exclude_t}}:

+
+ + + + + +{{if $exclude}} + {{foreach $exclude as $k=>$r}} + + + + + + + {{/foreach}} +{{else}} + + + + + + +{{/if}} + +
TagAttributeValue
+ +
+ +
+
diff --git a/retriever/view/smarty3/settings.tpl b/retriever/view/smarty3/settings.tpl new file mode 100644 index 00000000..3bc71559 --- /dev/null +++ b/retriever/view/smarty3/settings.tpl @@ -0,0 +1,22 @@ +{{* + * AUTOMATICALLY GENERATED TEMPLATE + * DO NOT EDIT THIS FILE, CHANGES WILL BE OVERWRITTEN + * + *}} +
+

{{$title}}

+

+ Get Help +

+ + + + + + + + + + +
{{$all_photos_t}}:
+