root/trunk/pdns/pdns/pdns_recursor.cc @ 1468

Revision 1468, 69.5 KB (checked in by ahu, 3 years ago)

silence 'sdig' EDNS-PING output, replace Utility::strcasecmp by boost::iequals, remove some more dead code, speed up initial label parsing

  • Property svn:eol-style set to native
  • Property svn:keywords set to author date id revision
Line 
1/*
2    PowerDNS Versatile Database Driven Nameserver
3    Copyright (C) 2003 - 2009  PowerDNS.COM BV
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License version 2
7    as published by the Free Software Foundation
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17*/
18
19#ifndef WIN32
20# include <netdb.h>
21# include <unistd.h>
22#else
23 #include "ntservice.hh"
24 #include "recursorservice.hh"
25#endif // WIN32
26
27#include <boost/foreach.hpp>
28#include "recpacketcache.hh"
29#include "utility.hh"
30#include "dns_random.hh"
31#include <iostream>
32#include <errno.h>
33#include <map>
34#include <set>
35#include "recursor_cache.hh"
36#include <stdio.h>
37#include <signal.h>
38#include <stdlib.h>
39#include "misc.hh"
40#include "mtasker.hh"
41#include <utility>
42#include "arguments.hh"
43#include "syncres.hh"
44#include <fcntl.h>
45#include <fstream>
46#include "sstuff.hh"
47#include <boost/tuple/tuple.hpp>
48#include <boost/tuple/tuple_comparison.hpp>
49#include <boost/shared_array.hpp>
50#include <boost/lexical_cast.hpp>
51#include <boost/function.hpp>
52#include <boost/algorithm/string.hpp>
53#include <netinet/tcp.h>
54#include "dnsparser.hh"
55#include "dnswriter.hh"
56#include "dnsrecords.hh"
57#include "zoneparser-tng.hh"
58#include "rec_channel.hh"
59#include "logger.hh"
60#include "iputils.hh"
61#include "mplexer.hh"
62#include "config.h"
63#include "lua-pdns-recursor.hh"
64
65#ifndef RECURSOR
66#include "statbag.hh"
67StatBag S;
68#endif
69
70__thread FDMultiplexer* t_fdm;
71__thread unsigned int t_id;
72unsigned int g_maxTCPPerClient;
73unsigned int g_networkTimeoutMsec;
74bool g_logCommonErrors;
75__thread shared_ptr<PowerDNSLua>* t_pdl;
76unsigned int g_luaReloadCounter;
77
78RecursorPacketCache g_packetCache;
79
80#include "namespaces.hh"
81
82#ifdef __FreeBSD__           // see cvstrac ticket #26
83#include <pthread.h>
84#include <semaphore.h>
85#endif
86
87
88MemRecursorCache RC;
89RecursorStats g_stats;
90bool g_quiet;
91NetmaskGroup* g_allowFrom;
92NetmaskGroup* g_dontQuery;
93string s_programname="pdns_recursor";
94typedef vector<int> tcpListenSockets_t;
95tcpListenSockets_t g_tcpListenSockets;   // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
96int g_tcpTimeout;
97//MemcachedCommunicator* g_mc;
98// DHCPCommunicator* g_dc;
99
100map<int, ComboAddress> g_listenSocketsAddresses; // is shared across all threads right now
101
102struct DNSComboWriter {
103  DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), 
104                                                                                                        d_tcp(false), d_socket(-1)
105  {}
106  MOADNSParser d_mdp;
107  void setRemote(ComboAddress* sa)
108  {
109    d_remote=*sa;
110  }
111
112  void setSocket(int sock)
113  {
114    d_socket=sock;
115  }
116
117  string getRemote() const
118  {
119    return d_remote.toString();
120  }
121
122  struct timeval d_now;
123  ComboAddress d_remote;
124  bool d_tcp;
125  int d_socket;
126};
127
128
129ArgvMap &arg()
130{
131  static ArgvMap theArg;
132  return theArg;
133}
134
135struct timeval g_now;
136typedef vector<int> tcpserversocks_t;
137
138__thread MT_t* MT; // the big MTasker
139
140void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
141
142// -1 is error, 0 is timeout, 1 is success
143int asendtcp(const string& data, Socket* sock) 
144{
145  PacketID pident;
146  pident.sock=sock;
147  pident.outMSG=data;
148 
149  t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
150  string packet;
151
152  int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
153
154  if(!ret || ret==-1) { // timeout
155    t_fdm->removeWriteFD(sock->getHandle());
156  }
157  else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
158    return -1;
159  }
160  return ret;
161}
162
163void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
164
165// -1 is error, 0 is timeout, 1 is success
166int arecvtcp(string& data, int len, Socket* sock) 
167{
168  data.clear();
169  PacketID pident;
170  pident.sock=sock;
171  pident.inNeeded=len;
172  t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
173
174  int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
175  if(!ret || ret==-1) { // timeout
176    t_fdm->removeReadFD(sock->getHandle());
177  }
178  else if(data.empty()) {// error, EOF or other
179    return -1;
180  }
181
182  return ret;
183}
184
185vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6; 
186ComboAddress g_local4("0.0.0.0"), g_local6("::");
187
188ComboAddress getQueryLocalAddress(int family, uint16_t port)
189{
190  ComboAddress ret;
191  if(family==AF_INET) {
192    if(g_localQueryAddresses4.empty()) 
193      ret = g_local4;
194    else 
195      ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
196    ret.sin4.sin_port = htons(port);
197  }
198  else {
199    if(g_localQueryAddresses6.empty())
200      ret = g_local6;
201    else
202      ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
203     
204    ret.sin6.sin6_port = htons(port);
205  }
206  return ret;
207}
208
209void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
210
211// you can ask this class for a UDP socket to send a query from
212// this socket is not yours, don't even think about deleting it
213// but after you call 'returnSocket' on it, don't assume anything anymore
214class UDPClientSocks
215{
216  unsigned int d_numsocks;
217  unsigned int d_maxsocks;
218  pthread_mutex_t d_lock;
219public:
220  UDPClientSocks() : d_numsocks(0), d_maxsocks(5000)
221  {
222    pthread_mutex_init(&d_lock, 0);
223  }
224
225  typedef set<int> socks_t;
226  socks_t d_socks;
227
228  // returning -1 means: temporary OS error (ie, out of files), -2 means OS error
229  int getSocket(const ComboAddress& toaddr, int* fd)
230  {
231    *fd=makeClientSocket(toaddr.sin4.sin_family);
232    if(*fd < 0) // temporary error - receive exception otherwise
233      return -1;
234
235    if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
236      int err = errno;
237      //      returnSocket(*fd);
238      Utility::closesocket(*fd);
239      if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
240        return -2;
241      return -1;
242    }
243
244    Lock l(&d_lock);
245
246    d_socks.insert(*fd);
247    d_numsocks++;
248    return 0;
249  }
250
251  void returnSocket(int fd)
252  {
253    Lock l(&d_lock);
254    socks_t::iterator i=d_socks.find(fd);
255    if(i==d_socks.end()) {
256      throw AhuException("Trying to return a socket (fd="+lexical_cast<string>(fd)+") not in the pool");
257    }
258    returnSocketLocked(i);
259  }
260
261  // return a socket to the pool, or simply erase it
262  void returnSocketLocked(socks_t::iterator& i)
263  {
264    if(i==d_socks.end()) {
265      throw AhuException("Trying to return a socket not in the pool");
266    }
267    try {
268      t_fdm->removeReadFD(*i);
269    }
270    catch(FDMultiplexerException& e) {
271      // we sometimes return a socket that has not yet been assigned to t_fdm
272    }
273    Utility::closesocket(*i);
274   
275    d_socks.erase(i++);
276    --d_numsocks;
277  }
278
279  // returns -1 for errors which might go away, throws for ones that won't
280  static int makeClientSocket(int family)
281  {
282    int ret=(int)socket(family, SOCK_DGRAM, 0);
283    if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
284      return ret;
285   
286    if(ret<0) 
287      throw AhuException("Making a socket for resolver: "+stringerror());
288
289   
290    int tries=10;
291    while(--tries) {
292      uint16_t port;
293     
294      if(tries==1)  // fall back to kernel 'random'
295        port = 0;
296      else
297        port = 1025 + dns_random(64510);
298     
299      ComboAddress sin=getQueryLocalAddress(family, port); // does htons for us
300
301      if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0) 
302        break;
303    }
304    if(!tries)
305      throw AhuException("Resolver binding to local query client socket: "+stringerror());
306   
307    Utility::setNonBlocking(ret);
308    return ret;
309  }
310} g_udpclientsocks;
311
312
313/* these two functions are used by LWRes */
314// -2 is OS error, -1 is error that depends on the remote, > 0 is success
315int asendto(const char *data, int len, int flags, 
316            const ComboAddress& toaddr, uint16_t id, const string& domain, uint16_t qtype, int* fd) 
317{
318
319  PacketID pident;
320  pident.domain = domain;
321  pident.remote = toaddr;
322  pident.type = qtype;
323
324  // see if there is an existing outstanding request we can chain on to, using partial equivalence function
325  pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
326
327  for(; chain.first != chain.second; chain.first++) {
328    if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
329      /*
330      cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
331      cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
332          <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
333      */
334      chain.first->key.chain.insert(id); // we can chain
335      *fd=-1;                            // gets used in waitEvent / sendEvent later on
336      return 1;
337    }
338  }
339
340  int ret=g_udpclientsocks.getSocket(toaddr, fd);
341  if(ret < 0)
342    return ret;
343
344  pident.fd=*fd;
345  pident.id=id;
346 
347  t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
348  ret = send(*fd, data, len, 0);
349
350  int tmp = errno;
351
352  if(ret < 0)
353    g_udpclientsocks.returnSocket(*fd);
354
355  errno = tmp; // this is for logging purposes only
356  return ret;
357}
358
359// -1 is error, 0 is timeout, 1 is success
360int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len, 
361              uint16_t id, const string& domain, uint16_t qtype, int fd, struct timeval* now)
362{
363  static optional<unsigned int> nearMissLimit;
364  if(!nearMissLimit) 
365    nearMissLimit=::arg().asNum("spoof-nearmiss-max");
366
367  PacketID pident;
368  pident.fd=fd;
369  pident.id=id;
370  pident.domain=domain;
371  pident.type = qtype;
372  pident.remote=fromaddr;
373
374  string packet;
375  int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
376
377  if(ret > 0) {
378    if(packet.empty()) // means "error"
379      return -1; 
380
381    *d_len=(int)packet.size();
382    memcpy(data,packet.c_str(),min(len,*d_len));
383    if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
384      L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
385      g_stats.spoofCount++;
386      return -1;
387    }
388  }
389  else {
390    if(fd >= 0)
391      g_udpclientsocks.returnSocket(fd);
392  }
393  return ret;
394}
395
396void setBuffer(int fd, int optname, uint32_t size)
397{
398  uint32_t psize=0;
399  socklen_t len=sizeof(psize);
400 
401  if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
402    L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
403    return; 
404  }
405
406  if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
407    L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
408}
409
410
411static void setReceiveBuffer(int fd, uint32_t size)
412{
413  setBuffer(fd, SO_RCVBUF, size);
414}
415
416static void setSendBuffer(int fd, uint32_t size)
417{
418  setBuffer(fd, SO_SNDBUF, size);
419}
420
421string s_pidfname;
422static void writePid(void)
423{
424  ofstream of(s_pidfname.c_str(), ios_base::app);
425  if(of)
426    of<< Utility::getpid() <<endl;
427  else
428    L<<Logger::Error<<"Requested to write pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
429}
430
431void primeHints(void)
432{
433  // prime root cache
434  set<DNSResourceRecord>nsset;
435#if 0
436  {
437    time_t now = time(0);
438
439    string templ;
440    DNSResourceRecord arr;
441   
442    arr.qtype=QType::AAAA;
443    arr.ttl=now+3600;
444    arr.content="::1";
445   
446    DTime dt;
447    dt.set();
448    for(int n = 0 ; n < 500000; ++n) {
449      set<DNSResourceRecord> aset;
450      arr.qname=templ="blah"+lexical_cast<string>(n)+".testdomain.com";
451      aset.insert(arr);
452      RC.replace(now, templ, QType(QType::AAAA), aset, true); // auth, nuke it all
453    }
454    cerr<<"fill1 secs: "<<dt.udiff()/1000000.0<<endl;
455
456    arr.content="::2";
457    dt.set();
458    for(int n = 0 ; n < 500000; ++n) {
459      set<DNSResourceRecord> aset;
460      arr.qname=templ="blah"+lexical_cast<string>(n)+".testdomain.com";
461      aset.insert(arr);
462      RC.replace(now, templ, QType(QType::AAAA), aset, true); // auth, nuke it all
463    }
464    cerr<<"refill secs: "<<dt.udiff()/1000000.0<<endl;
465
466
467    dt.set();
468    for(int n = 0 ; n < 500000; ++n) {
469      set<DNSResourceRecord> aset;
470      templ="blah"+lexical_cast<string>(n)+".testdomain.com";
471      RC.get(now, templ, QType(QType::AAAA), &aset); // auth, nuke it all
472    }
473    cerr<<"get secs: "<<dt.udiff()/1000000.0<<endl;
474    vector<string> names;
475    for(int n = 0 ; n < 500000; ++n) {
476      templ="blah"+lexical_cast<string>(n)+".testdomain.com";
477      names.push_back(templ);
478    }
479    random_shuffle(names.begin(), names.end());
480    cerr<<"go!"<<endl;
481    dt.set();
482    for(int n = 0 ; n < 500000; ++n) {
483      vector<DNSResourceRecord> avect;
484      RC.get2(now, names[n], QType(QType::AAAA), &avect); // auth, nuke it all
485    }
486    cerr<<"get2 secs: "<<dt.udiff()/1000000.0<<endl;
487
488    //    exit(1);
489  }
490#endif
491
492  if(::arg()["hint-file"].empty()) {
493    static const char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", 
494                             "192.112.36.4", "128.63.2.53",
495                             "192.36.148.17","192.58.128.30", "193.0.14.129", "199.7.83.42", "202.12.27.33"};
496    static const char *ip6s[]={
497      "2001:503:ba3e::2:30", NULL, NULL, NULL, NULL,
498      "2001:500:2f::f", NULL, "2001:500:1::803f:235", NULL,
499      "2001:503:c27::2:30", NULL, NULL, NULL
500    };
501    DNSResourceRecord arr, aaaarr, nsrr;
502    arr.qtype=QType::A;
503    aaaarr.qtype=QType::AAAA;
504    nsrr.qtype=QType::NS;
505    arr.ttl=aaaarr.ttl=nsrr.ttl=time(0)+3600000;
506   
507    for(char c='a';c<='m';++c) {
508      static char templ[40];
509      strncpy(templ,"a.root-servers.net.", sizeof(templ) - 1);
510      *templ=c;
511      aaaarr.qname=arr.qname=nsrr.content=templ;
512      arr.content=ips[c-'a'];
513      set<DNSResourceRecord> aset;
514      aset.insert(arr);
515      RC.replace(time(0), string(templ), QType(QType::A), aset, true); // auth, nuke it all
516      if (ip6s[c-'a'] != NULL) {
517        aaaarr.content=ip6s[c-'a'];
518
519        set<DNSResourceRecord> aaaaset;
520        aaaaset.insert(aaaarr);
521        RC.replace(time(0), string(templ), QType(QType::AAAA), aaaaset, true);
522      }
523     
524      nsset.insert(nsrr);
525    }
526  }
527  else {
528    ZoneParserTNG zpt(::arg()["hint-file"]);
529    DNSResourceRecord rr;
530
531    while(zpt.get(rr)) {
532      rr.ttl+=time(0);
533      if(rr.qtype.getCode()==QType::A) {
534        set<DNSResourceRecord> aset;
535        aset.insert(rr);
536        RC.replace(time(0), rr.qname, QType(QType::A), aset, true); // auth, etc see above
537      } else if(rr.qtype.getCode()==QType::AAAA) {
538        set<DNSResourceRecord> aaaaset;
539        aaaaset.insert(rr);
540        RC.replace(time(0), rr.qname, QType(QType::AAAA), aaaaset, true);
541      } else if(rr.qtype.getCode()==QType::NS) {
542        rr.content=toLower(rr.content);
543        nsset.insert(rr);
544      }
545    }
546  }
547  RC.replace(time(0),".", QType(QType::NS), nsset, true); // and stuff in the cache (auth)
548}
549
550map<ComboAddress, uint32_t> g_tcpClientCounts;
551
552struct TCPConnection
553{
554  int fd;
555  enum stateenum {BYTE0, BYTE1, GETQUESTION, DONE} state;
556  int qlen;
557  int bytesread;
558  ComboAddress remote;
559  char data[65535];
560  time_t startTime;
561
562  static void closeAndCleanup(int fd, const ComboAddress& remote) 
563  {
564    Utility::closesocket(fd);
565    if(!g_tcpClientCounts[remote]--) 
566      g_tcpClientCounts.erase(remote);
567    s_currentConnections--;
568  }
569  void closeAndCleanup()
570  {
571    closeAndCleanup(fd, remote);
572  }
573  static unsigned int s_currentConnections; //!< total number of current TCP connections
574};
575
576unsigned int TCPConnection::s_currentConnections; 
577void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
578
579void startDoResolve(void *p)
580{
581  DNSComboWriter* dc=(DNSComboWriter *)p;
582
583  try {
584    uint16_t maxudpsize=512;
585    EDNSOpts edo;
586    if(getEDNSOpts(dc->d_mdp, &edo)) {
587      maxudpsize=max(edo.d_packetsize, (uint16_t)1280);
588    }
589   
590    vector<DNSResourceRecord> ret;
591    vector<uint8_t> packet;
592
593    DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass); 
594
595    pw.getHeader()->aa=0;
596    pw.getHeader()->ra=1;
597    pw.getHeader()->qr=1;
598    pw.getHeader()->tc=0;
599    pw.getHeader()->id=dc->d_mdp.d_header.id;
600    pw.getHeader()->rd=dc->d_mdp.d_header.rd;
601
602    SyncRes sr(dc->d_now);
603    if(!g_quiet)
604      L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
605       <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
606
607    sr.setId(MT->getTid());
608    if(!dc->d_mdp.d_header.rd)
609      sr.setCacheOnly();
610
611    int res;
612
613    if(!t_pdl->get() || !(*t_pdl)->preresolve(dc->d_remote, g_listenSocketsAddresses[dc->d_socket], dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res)) {
614       res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
615
616      if(t_pdl->get()) {
617        if(res == RCode::NXDomain)
618          (*t_pdl)->nxdomain(dc->d_remote, g_listenSocketsAddresses[dc->d_socket], dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), ret, res);
619      }
620    }
621    uint32_t minTTL=numeric_limits<uint32_t>::max();
622    if(res<0) {
623      pw.getHeader()->rcode=RCode::ServFail;
624      // no commit here, because no record
625      g_stats.servFails++;
626    }
627    else {
628      pw.getHeader()->rcode=res;
629      switch(res) {
630      case RCode::ServFail:
631        g_stats.servFails++;
632        break;
633      case RCode::NXDomain:
634        g_stats.nxDomains++;
635        break;
636      case RCode::NoError:
637        g_stats.noErrors++;
638        break;
639      }
640     
641
642      if(ret.size()) {
643        shuffle(ret);
644       
645//      for(int n=0; n< 50 && packet.size() < 65506; ++n)
646        for(vector<DNSResourceRecord>::const_iterator i=ret.begin(); i!=ret.end(); ++i) {
647          pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, i->qclass, (DNSPacketWriter::Place)i->d_place); 
648          minTTL = min(minTTL, i->ttl);
649          if(i->qtype.getCode() == QType::A) { // blast out A record w/o doing whole dnswriter thing
650            uint32_t ip=0;
651            IpToU32(i->content, &ip);
652            pw.xfr32BitInt(htonl(ip));
653          } else {
654            shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), i->qclass, i->content)); 
655            drc->toPacket(pw);
656          }
657          if(!dc->d_tcp && pw.size() > maxudpsize) {
658            pw.rollback();
659            if(i->d_place==DNSResourceRecord::ANSWER)  // only truncate if we actually omitted parts of the answer
660              pw.getHeader()->tc=1;
661            goto sendit; // need to jump over pw.commit
662          }
663        }
664
665      pw.commit();
666      }
667    }
668  sendit:;
669    if(!dc->d_tcp) {
670      sendto(dc->d_socket, (const char*)&*packet.begin(), packet.size(), 0, (struct sockaddr *)(&dc->d_remote), dc->d_remote.getSocklen());
671      if(!SyncRes::s_nopacketcache) {
672        g_packetCache.insertResponsePacket(string((const char*)&*packet.begin(), packet.size()), g_now.tv_sec, 
673                                           min(minTTL, 
674                                               pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl : SyncRes::s_packetcachettl
675                                               )
676                                          );
677      }
678    }
679    else {
680      char buf[2];
681      buf[0]=packet.size()/256;
682      buf[1]=packet.size()%256;
683
684      Utility::iovec iov[2];
685
686      iov[0].iov_base=(void*)buf;              iov[0].iov_len=2;
687      iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
688
689      int ret=Utility::writev(dc->d_socket, iov, 2);
690      bool hadError=true;
691
692      if(ret == 0) 
693        L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
694      else if(ret < 0 ) 
695        L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
696      else if((unsigned int)ret != 2 + packet.size())
697        L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
698      else
699        hadError=false;
700     
701      // update tcp connection status, either by closing or moving to 'BYTE0'
702   
703      if(hadError) {
704        // no need to remove us from FDM, we weren't there
705        TCPConnection::closeAndCleanup(dc->d_socket, dc->d_remote);
706      }
707      else {
708        TCPConnection tc;
709        tc.fd=dc->d_socket;
710        tc.state=TCPConnection::BYTE0;
711        tc.remote=dc->d_remote;
712        Utility::gettimeofday(&g_now, 0); // needs to be updated
713        tc.startTime=g_now.tv_sec;
714        t_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
715        t_fdm->setReadTTD(tc.fd, g_now, g_tcpTimeout);
716      }
717    }
718   
719    if(!g_quiet) {
720      L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
721      L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
722      sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
723    }
724
725    sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++; 
726    float spent=makeFloat(sr.d_now-dc->d_now);
727    if(spent < 0.001)
728      g_stats.answers0_1++;
729    else if(spent < 0.010)
730      g_stats.answers1_10++;
731    else if(spent < 0.1)
732      g_stats.answers10_100++;
733    else if(spent < 1.0)
734      g_stats.answers100_1000++;
735    else
736      g_stats.answersSlow++;
737
738    uint64_t newLat=(uint64_t)(spent*1000000);
739    if(newLat < 1000000)  // outliers of several minutes exist..
740      g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
741
742    delete dc;
743  }
744  catch(AhuException &ae) {
745    L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
746  }
747  catch(MOADNSException& e) {
748    L<<Logger::Error<<"DNS parser error: "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
749  }
750  catch(std::exception& e) {
751    L<<Logger::Error<<"STL error: "<<e.what()<<endl;
752  }
753  catch(...) {
754    L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
755  }
756}
757
758RecursorControlChannel s_rcc;
759
760void makeControlChannelSocket()
761{
762  string sockname=::arg()["socket-dir"]+"/pdns_recursor.controlsocket";
763  if(::arg().mustDo("fork")) {
764    sockname+="."+lexical_cast<string>(Utility::getpid());
765    L<<Logger::Warning<<"Forked control socket name: "<<sockname<<endl;
766  }
767  s_rcc.listen(sockname);
768}
769
770void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
771{
772  TCPConnection* conn=any_cast<TCPConnection>(&var);
773
774  if(conn->state==TCPConnection::BYTE0) {
775    int bytes=recv(conn->fd, conn->data, 2, 0);
776    if(bytes==1)
777      conn->state=TCPConnection::BYTE1;
778    if(bytes==2) { 
779      conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
780      conn->bytesread=0;
781      conn->state=TCPConnection::GETQUESTION;
782    }
783    if(!bytes || bytes < 0) {
784      TCPConnection tmp(*conn); 
785      t_fdm->removeReadFD(fd);
786      tmp.closeAndCleanup();
787      return;
788    }
789  }
790  else if(conn->state==TCPConnection::BYTE1) {
791    int bytes=recv(conn->fd, conn->data+1, 1, 0);
792    if(bytes==1) {
793      conn->state=TCPConnection::GETQUESTION;
794      conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
795      conn->bytesread=0;
796    }
797    if(!bytes || bytes < 0) {
798      if(g_logCommonErrors)
799        L<<Logger::Error<<"TCP client "<< conn->remote.toString() <<" disconnected after first byte"<<endl;
800      TCPConnection tmp(*conn); 
801      t_fdm->removeReadFD(fd);
802      tmp.closeAndCleanup();  // conn loses validity here..
803      return;
804    }
805  }
806  else if(conn->state==TCPConnection::GETQUESTION) {
807    int bytes=recv(conn->fd, conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
808    if(!bytes || bytes < 0) {
809      L<<Logger::Error<<"TCP client "<< conn->remote.toString() <<" disconnected while reading question body"<<endl;
810      TCPConnection tmp(*conn);
811      t_fdm->removeReadFD(fd);
812      tmp.closeAndCleanup();  // conn loses validity here..
813
814      return;
815    }
816    conn->bytesread+=bytes;
817    if(conn->bytesread==conn->qlen) {
818      TCPConnection tconn(*conn); 
819      t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
820
821      DNSComboWriter* dc=0;
822      try {
823        dc=new DNSComboWriter(tconn.data, tconn.qlen, g_now);
824      }
825      catch(MOADNSException &mde) {
826        g_stats.clientParseError++; 
827        if(g_logCommonErrors)
828          L<<Logger::Error<<"Unable to parse packet from TCP client "<< tconn.remote.toString() <<endl;
829        tconn.closeAndCleanup();
830        return;
831      }
832     
833      dc->setSocket(tconn.fd);
834      dc->d_tcp=true;
835      dc->setRemote(&tconn.remote);
836      if(dc->d_mdp.d_header.qr) {
837        delete dc;
838        L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
839        tconn.closeAndCleanup();
840        return;
841      }
842      else {
843        ++g_stats.qcounter;
844        ++g_stats.tcpqcounter;
845        MT->makeThread(startDoResolve, dc); // deletes dc
846        return;
847      }
848    }
849  }
850}
851
852//! Handle new incoming TCP connection
853void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
854{
855  ComboAddress addr;
856  socklen_t addrlen=sizeof(addr);
857  int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
858  if(newsock>0) {
859    g_stats.addRemote(addr);
860    if(g_allowFrom && !g_allowFrom->match(&addr)) {
861      if(!g_quiet) 
862        L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
863
864      g_stats.unauthorizedTCP++;
865      Utility::closesocket(newsock);
866      return;
867    }
868   
869    if(g_maxTCPPerClient && g_tcpClientCounts.count(addr) && g_tcpClientCounts[addr] >= g_maxTCPPerClient) {
870      g_stats.tcpClientOverflow++;
871      Utility::closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
872      return;
873    }
874    g_tcpClientCounts[addr]++;
875    Utility::setNonBlocking(newsock);
876    TCPConnection tc;
877    tc.fd=newsock;
878    tc.state=TCPConnection::BYTE0;
879    tc.remote=addr;
880    tc.startTime=g_now.tv_sec;
881    TCPConnection::s_currentConnections++;
882    t_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
883
884    struct timeval now;
885    Utility::gettimeofday(&now, 0);
886    t_fdm->setReadTTD(tc.fd, now, g_tcpTimeout);
887  }
888}
889 
890
891
892void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
893{
894  //  static HTimer s_timer("udp new question processing");
895  //  HTimerSentinel hts=s_timer.getSentinel();
896  int len;
897  char data[1500];
898  ComboAddress fromaddr;
899  socklen_t addrlen=sizeof(fromaddr);
900
901  if((len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
902    g_stats.addRemote(fromaddr);
903
904    if(g_allowFrom && !g_allowFrom->match(&fromaddr)) {
905      if(!g_quiet) 
906        L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
907
908      g_stats.unauthorizedUDP++;
909      return;
910    }
911    try {
912      dnsheader* dh=(dnsheader*)data;
913     
914      if(dh->qr) {
915        if(g_logCommonErrors)
916          L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
917      }
918      else {
919        ++g_stats.qcounter;
920
921        string response;
922        if(!SyncRes::s_nopacketcache && g_packetCache.getResponsePacket(string(data, len), g_now.tv_sec, &response)) {
923          if(!g_quiet)
924            L<<Logger::Error<<t_id<< " question answered from packet cache from "<<fromaddr.toString()<<endl;
925
926          g_stats.packetCacheHits++;
927          SyncRes::s_queries++;
928          sendto(fd, response.c_str(), response.length(), 0, (struct sockaddr*) &fromaddr, fromaddr.getSocklen());
929          return;
930        }
931
932        DNSComboWriter* dc = new DNSComboWriter(data, len, g_now);
933        dc->setSocket(fd);
934        dc->setRemote(&fromaddr);
935
936        dc->d_tcp=false;
937
938        MT->makeThread(startDoResolve, (void*) dc); // deletes dc
939      }
940    }
941    catch(MOADNSException& mde) {
942      g_stats.clientParseError++; 
943      if(g_logCommonErrors)
944        L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
945    }
946  }
947}
948
949typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
950deferredAdd_t deferredAdd;
951
952void makeTCPServerSockets()
953{
954  int fd;
955  vector<string>locals;
956  stringtok(locals,::arg()["local-address"]," ,");
957
958  if(locals.empty())
959    throw AhuException("No local address specified");
960 
961  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
962    ServiceTuple st;
963    st.port=::arg().asNum("local-port");
964    parseService(*i, st);
965   
966    ComboAddress sin;
967
968    memset((char *)&sin,0, sizeof(sin));
969    sin.sin4.sin_family = AF_INET;
970    if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
971      sin.sin6.sin6_family = AF_INET6;
972      if(Utility::inet_pton(AF_INET6, st.host.c_str(), &sin.sin6.sin6_addr) <= 0)
973        throw AhuException("Unable to resolve local address for TCP server on '"+ st.host +"'"); 
974    }
975
976    fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
977    if(fd<0) 
978      throw AhuException("Making a TCP server socket for resolver: "+stringerror());
979
980    int tmp=1;
981    if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
982      L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
983      exit(1);
984    }
985   
986#ifdef TCP_DEFER_ACCEPT
987    if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
988      if(i==locals.begin())
989        L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
990    }
991#endif
992
993    sin.sin4.sin_port = htons(st.port);
994    int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
995    if (::bind(fd, (struct sockaddr *)&sin, socklen )<0) 
996      throw AhuException("Binding TCP server socket for "+ st.host +": "+stringerror());
997   
998    Utility::setNonBlocking(fd);
999    setSendBuffer(fd, 65000);
1000    listen(fd, 128);
1001    deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
1002    g_tcpListenSockets.push_back(fd);
1003
1004    if(sin.sin4.sin_family == AF_INET) 
1005      L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1006    else
1007      L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1008  }
1009}
1010
1011
1012
1013void makeUDPServerSockets()
1014{
1015  vector<string>locals;
1016  stringtok(locals,::arg()["local-address"]," ,");
1017
1018  if(locals.empty())
1019    throw AhuException("No local address specified");
1020 
1021  if(::arg()["local-address"]=="0.0.0.0") {
1022    L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
1023  }
1024
1025  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1026    ServiceTuple st;
1027    st.port=::arg().asNum("local-port");
1028    parseService(*i, st);
1029
1030    ComboAddress sin;
1031
1032    memset(&sin, 0, sizeof(sin));
1033    sin.sin4.sin_family = AF_INET;
1034    if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1035      sin.sin6.sin6_family = AF_INET6;
1036      if(Utility::inet_pton(AF_INET6, st.host.c_str(), &sin.sin6.sin6_addr) <= 0)
1037        throw AhuException("Unable to resolve local address for UDP server on '"+ st.host +"'"); 
1038    }
1039   
1040    int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
1041
1042    if(fd < 0) {
1043      throw AhuException("Making a UDP server socket for resolver: "+netstringerror());
1044    }
1045
1046    setReceiveBuffer(fd, 200000);
1047    sin.sin4.sin_port = htons(st.port);
1048
1049    int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1050    if (::bind(fd, (struct sockaddr *)&sin, socklen)<0) 
1051      throw AhuException("Resolver binding to server socket on port "+ lexical_cast<string>(st.port) +" for "+ st.host+": "+stringerror());
1052   
1053    Utility::setNonBlocking(fd);
1054
1055    deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
1056    g_listenSocketsAddresses[fd]=sin;  // this is written to only from the startup thread, not from the workers
1057    if(sin.sin4.sin_family == AF_INET) 
1058      L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
1059    else
1060      L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1061  }
1062}
1063
1064
1065#ifndef WIN32
1066void daemonize(void)
1067{
1068  if(fork())
1069    exit(0); // bye bye
1070 
1071  setsid(); 
1072
1073  int i=open("/dev/null",O_RDWR); /* open stdin */
1074  if(i < 0) 
1075    L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1076  else {
1077    dup2(i,0); /* stdin */
1078    dup2(i,1); /* stderr */
1079    dup2(i,2); /* stderr */
1080    close(i);
1081  }
1082}
1083#endif
1084
1085uint64_t counter;
1086bool statsWanted;
1087
1088
1089void usr1Handler(int)
1090{
1091  statsWanted=true;
1092}
1093
1094
1095
1096void usr2Handler(int)
1097{
1098  SyncRes::setLog(true);
1099  g_quiet=false;
1100  ::arg().set("quiet")="no";
1101
1102}
1103
1104void doStats(void)
1105{
1106  if(g_stats.qcounter && (RC.cacheHits + RC.cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
1107    //L<<Logger::Warning<<"stats: "<<g_stats.qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
1108    L<<Logger::Warning<<"stats: " <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
1109    //    L<<Logger::Warning<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
1110    // <<endl; // <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
1111    L<<Logger::Warning<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1112    L<<Logger::Warning<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
1113     <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
1114    L<<Logger::Warning<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
1115
1116    L<<Logger::Warning<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1117      g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
1118    L<<Logger::Warning<<"stats: "<<g_stats.packetCacheHits<<" packet cache hits ("<<(int)(100.0*g_stats.packetCacheHits/SyncRes::s_queries) << "%)"<<endl;
1119  }
1120  else if(statsWanted) 
1121    L<<Logger::Warning<<"stats: no stats yet!"<<endl;
1122
1123  //  HTimer::listAll();
1124
1125  statsWanted=false;
1126}
1127
1128static void houseKeeping(void *)
1129try
1130{
1131  static time_t last_stat, last_rootupdate, last_prune;
1132  struct timeval now;
1133  Utility::gettimeofday(&now, 0);
1134
1135  if(now.tv_sec - last_prune > 300) { 
1136    DTime dt;
1137    dt.setTimeval(now);
1138    RC.doPrune();
1139   
1140#if 0
1141    typedef SyncRes::negcache_t::nth_index<1>::type negcache_by_ttd_index_t;
1142    negcache_by_ttd_index_t& ttdindex=boost::multi_index::get<1>(SyncRes::s_negcache);
1143
1144    negcache_by_ttd_index_t::iterator i=ttdindex.lower_bound(now.tv_sec);
1145    ttdindex.erase(ttdindex.begin(), i);
1146
1147    time_t limit=now.tv_sec-300;
1148    for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
1149      if(i->second.stale(limit))
1150        SyncRes::s_nsSpeeds.erase(i++);
1151      else
1152        ++i;
1153#endif
1154    //   cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
1155//    cout<<"Prune took "<<dt.udiff()<<"usec\n";
1156    last_prune=time(0);
1157  }
1158  if(now.tv_sec - last_stat>1800) { 
1159    doStats();
1160    last_stat=time(0);
1161  }
1162  if(now.tv_sec - last_rootupdate > 7200) {
1163    SyncRes sr(now);
1164    sr.setDoEDNS0(true);
1165    vector<DNSResourceRecord> ret;
1166
1167    sr.setNoCache();
1168    int res=sr.beginResolve(".", QType(QType::NS), 1, ret);
1169    if(!res) {
1170      L<<Logger::Warning<<"Refreshed . records"<<endl;
1171      last_rootupdate=now.tv_sec;
1172    }
1173    else
1174      L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1175  }
1176}
1177catch(AhuException& ae)
1178{
1179  L<<Logger::Error<<"Fatal error: "<<ae.reason<<endl;
1180  throw;
1181}
1182;
1183
1184
1185void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
1186{
1187  string remote;
1188  string msg=s_rcc.recv(&remote);
1189  RecursorControlParser rcp;
1190  RecursorControlParser::func_t* command;
1191  string answer=rcp.getAnswer(msg, &command);
1192  try {
1193    s_rcc.send(answer, &remote);
1194    command();
1195  }
1196  catch(std::exception& e) {
1197    L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1198  }
1199  catch(AhuException& ae) {
1200    L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1201  }
1202}
1203
1204void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
1205{
1206  PacketID* pident=any_cast<PacketID>(&var);
1207  //  cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
1208
1209  shared_array<char> buffer(new char[pident->inNeeded]);
1210
1211  int ret=recv(fd, buffer.get(), pident->inNeeded,0);
1212  if(ret > 0) {
1213    pident->inMSG.append(&buffer[0], &buffer[ret]);
1214    pident->inNeeded-=ret;
1215    if(!pident->inNeeded) {
1216      //      cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1217      PacketID pid=*pident;
1218      string msg=pident->inMSG;
1219     
1220      t_fdm->removeReadFD(fd);
1221      MT->sendEvent(pid, &msg); 
1222    }
1223    else {
1224      //      cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
1225    }
1226  }
1227  else {
1228    PacketID tmp=*pident;
1229    t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
1230    string empty;
1231    MT->sendEvent(tmp, &empty); // this conveys error status
1232  }
1233}
1234
1235void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
1236{
1237  PacketID* pid=any_cast<PacketID>(&var);
1238  int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
1239  if(ret > 0) {
1240    pid->outPos+=ret;
1241    if(pid->outPos==pid->outMSG.size()) {
1242      PacketID tmp=*pid;
1243      t_fdm->removeWriteFD(fd);
1244      MT->sendEvent(tmp, &tmp.outMSG);  // send back what we sent to convey everything is ok
1245    }
1246  }
1247  else {  // error or EOF
1248    PacketID tmp(*pid);
1249    t_fdm->removeWriteFD(fd);
1250    string sent;
1251    MT->sendEvent(tmp, &sent);         // we convey error status by sending empty string
1252  }
1253}
1254
1255// resend event to everybody chained onto it
1256void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1257{
1258  if(iter->key.chain.empty())
1259    return;
1260  //  cerr<<"doResends called!\n";
1261  for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1262    resend.fd=-1;
1263    resend.id=*i;
1264    //    cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
1265
1266    MT->sendEvent(resend, &content);
1267    g_stats.chainResends++;
1268  }
1269}
1270
1271void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
1272{
1273  PacketID pid=any_cast<PacketID>(var);
1274  int len;
1275  char data[1500];
1276  ComboAddress fromaddr;
1277  socklen_t addrlen=sizeof(fromaddr);
1278
1279  len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
1280
1281  if(len < (int)sizeof(dnsheader)) {
1282    if(len < 0)
1283      ; //      cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
1284    else {
1285      g_stats.serverParseError++; 
1286      if(g_logCommonErrors)
1287        L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< sockAddrToString((struct sockaddr_in*) &fromaddr) <<
1288          ": packet smalller than DNS header"<<endl;
1289    }
1290
1291    g_udpclientsocks.returnSocket(fd);
1292    string empty;
1293
1294    MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
1295    if(iter != MT->d_waiters.end()) 
1296      doResends(iter, pid, empty);
1297   
1298    MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
1299    return;
1300  } 
1301
1302  dnsheader dh;
1303  memcpy(&dh, data, sizeof(dh));
1304 
1305  if(dh.qr) {
1306    PacketID pident;
1307    pident.remote=fromaddr;
1308    pident.id=dh.id;
1309    pident.fd=fd;
1310    if(!dh.qdcount) { // UPC, Nominum, very old BIND on FormErr, NSD
1311      pident.domain.clear();
1312      pident.type = 0;
1313    }
1314    else {
1315      try {
1316        pident.domain=questionExpand(data, len, pident.type); // don't copy this from above - we need to do the actual read
1317      }
1318      catch(std::exception& e) {
1319        L<<Logger::Warning<<"Error in packet from "<<sockAddrToString((struct sockaddr_in*) &fromaddr) << ": "<<e.what() << endl;
1320        return;
1321      }
1322    }
1323    string packet;
1324    packet.assign(data, len);
1325
1326    MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1327    if(iter != MT->d_waiters.end()) {
1328      doResends(iter, pident, packet);
1329    }
1330
1331  retryWithName:
1332
1333    if(!MT->sendEvent(pident, &packet)) {
1334//      if(g_logCommonErrors)
1335//      L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toString()<<": "<<pident.type<<endl;
1336      g_stats.unexpectedCount++;
1337     
1338      for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1339        if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote &&  mthread->key.type == pident.type &&
1340           boost::iequals(pident.domain, mthread->key.domain)) {
1341          mthread->key.nearMisses++;
1342        }
1343
1344        // be a bit paranoid here since we're weakening our matching
1345        if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type && 
1346           pident.id  == mthread->key.id && mthread->key.remote == pident.remote) {
1347          //        cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
1348          pident.domain = mthread->key.domain;
1349          pident.type = mthread->key.type;
1350          g_stats.unexpectedCount--;
1351          goto retryWithName;
1352        }
1353      }
1354    }
1355    else if(fd >= 0) {
1356      g_udpclientsocks.returnSocket(fd);
1357    }
1358  }
1359  else
1360    L<<Logger::Warning<<"Ignoring question on outgoing socket from "<< sockAddrToString((struct sockaddr_in*) &fromaddr)  <<endl;
1361}
1362
1363FDMultiplexer* getMultiplexer()
1364{
1365  FDMultiplexer* ret;
1366  for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1367      i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1368    try {
1369      ret=i->second();
1370      return ret;
1371    }
1372    catch(FDMultiplexerException &fe) {
1373      L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
1374    }
1375    catch(...) {
1376      L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1377    }
1378  }
1379  L<<Logger::Error<<"No working multiplexer found!"<<endl;
1380  exit(1);
1381}
1382
1383static void makeNameToIPZone(const string& hostname, const string& ip)
1384{
1385  SyncRes::AuthDomain ad;
1386  DNSResourceRecord rr;
1387  rr.qname=toCanonic("", hostname);
1388  rr.d_place=DNSResourceRecord::ANSWER;
1389  rr.ttl=86400;
1390  rr.qtype=QType::SOA;
1391  rr.content="localhost. root 1 604800 86400 2419200 604800";
1392 
1393  ad.d_records.insert(rr);
1394
1395  rr.qtype=QType::NS;
1396  rr.content="localhost.";
1397
1398  ad.d_records.insert(rr);
1399 
1400  rr.qtype=QType::A;
1401  rr.content=ip;
1402  ad.d_records.insert(rr);
1403 
1404  if(SyncRes::s_domainmap.count(rr.qname)) {
1405    L<<Logger::Warning<<"Hosts file will not overwrite zone '"<<rr.qname<<"' already loaded"<<endl;
1406  }
1407  else {
1408    L<<Logger::Warning<<"Inserting forward zone '"<<rr.qname<<"' based on hosts file"<<endl;
1409    SyncRes::s_domainmap[rr.qname]=ad;
1410  }
1411}
1412
1413//! parts[0] must be an IP address, the rest must be host names
1414static void makeIPToNamesZone(const vector<string>& parts) 
1415{
1416  string address=parts[0];
1417  vector<string> ipparts;
1418  stringtok(ipparts, address,".");
1419 
1420  SyncRes::AuthDomain ad;
1421  DNSResourceRecord rr;
1422  for(int n=ipparts.size()-1; n>=0 ; --n) {
1423    rr.qname.append(ipparts[n]);
1424    rr.qname.append(1,'.');
1425  }
1426  rr.qname.append("in-addr.arpa.");
1427
1428  rr.d_place=DNSResourceRecord::ANSWER;
1429  rr.ttl=86400;
1430  rr.qtype=QType::SOA;
1431  rr.content="localhost. root. 1 604800 86400 2419200 604800";
1432 
1433  ad.d_records.insert(rr);
1434
1435  rr.qtype=QType::NS;
1436  rr.content="localhost.";
1437
1438  ad.d_records.insert(rr);
1439  rr.qtype=QType::PTR;
1440
1441  if(ipparts.size()==4)  // otherwise this is a partial zone
1442    for(unsigned int n=1; n < parts.size(); ++n) {
1443      rr.content=toCanonic("", parts[n]);
1444      ad.d_records.insert(rr);
1445    }
1446
1447  if(SyncRes::s_domainmap.count(rr.qname)) {
1448    L<<Logger::Warning<<"Will not overwrite zone '"<<rr.qname<<"' already loaded"<<endl;
1449  }
1450  else {
1451    if(ipparts.size()==4)
1452      L<<Logger::Warning<<"Inserting reverse zone '"<<rr.qname<<"' based on hosts file"<<endl;
1453    SyncRes::s_domainmap[rr.qname]=ad;
1454  }
1455}
1456
1457
1458void parseAuthAndForwards();
1459
1460/* mission in life: parse three cases
1461   1) 1.2.3.4
1462   2) 1.2.3.4:5300
1463   3) 2001::1
1464   4) [2002::1]:53
1465*/
1466
1467ComboAddress parseIPAndPort(const std::string& input, uint16_t port)
1468{
1469  if(input.find(':') == string::npos || input.empty()) // common case
1470    return ComboAddress(input, port);
1471
1472  pair<string,string> both;
1473
1474  try { // case 2
1475    both=splitField(input,':');
1476    uint16_t newport=boost::lexical_cast<uint16_t>(both.second);
1477    return ComboAddress(both.first, newport);
1478  } 
1479  catch(...){}
1480
1481  if(input[0]=='[') { // case 4
1482    both=splitField(input.substr(1),']');
1483    return ComboAddress(both.first, both.second.empty() ? port : boost::lexical_cast<uint16_t>(both.second.substr(1)));
1484  }
1485
1486  return ComboAddress(input, port); // case 3
1487}
1488
1489
1490void convertServersForAD(const std::string& input, SyncRes::AuthDomain& ad, const char* sepa, bool verbose=true)
1491{
1492  vector<string> servers;
1493  stringtok(servers, input, sepa);
1494  ad.d_servers.clear();
1495
1496  for(vector<string>::const_iterator iter = servers.begin(); iter != servers.end(); ++iter) {
1497    if(verbose && iter != servers.begin()) 
1498      L<<", ";
1499
1500    ComboAddress addr=parseIPAndPort(*iter, 53);
1501    if(verbose)
1502      L<<addr.toStringWithPort();
1503    ad.d_servers.push_back(addr);
1504  }
1505  if(verbose)
1506    L<<endl;
1507}
1508
1509string reloadAuthAndForwards()
1510{
1511  SyncRes::domainmap_t original=SyncRes::s_domainmap;
1512 
1513  try {
1514    L<<Logger::Warning<<"Reloading zones, purging data from cache"<<endl;
1515 
1516    for(SyncRes::domainmap_t::const_iterator i = SyncRes::s_domainmap.begin(); i != SyncRes::s_domainmap.end(); ++i) {
1517      for(SyncRes::AuthDomain::records_t::const_iterator j = i->second.d_records.begin(); j != i->second.d_records.end(); ++j) 
1518        RC.doWipeCache(j->qname);
1519    }
1520
1521    string configname=::arg()["config-dir"]+"/recursor.conf";
1522    cleanSlashes(configname);
1523   
1524    if(!::arg().preParseFile(configname.c_str(), "forward-zones")) 
1525      L<<Logger::Warning<<"Unable to re-parse configuration file '"<<configname<<"'"<<endl;
1526   
1527    ::arg().preParseFile(configname.c_str(), "auth-zones");
1528    ::arg().preParseFile(configname.c_str(), "export-etc-hosts", "off");
1529    ::arg().preParseFile(configname.c_str(), "serve-rfc1918");
1530
1531    parseAuthAndForwards();
1532   
1533    // purge again - new zones need to blank out the cache
1534    for(SyncRes::domainmap_t::const_iterator i = SyncRes::s_domainmap.begin(); i != SyncRes::s_domainmap.end(); ++i) {
1535      for(SyncRes::AuthDomain::records_t::const_iterator j = i->second.d_records.begin(); j != i->second.d_records.end(); ++j) 
1536        RC.doWipeCache(j->qname);
1537    }
1538
1539    // this is pretty blunt
1540    Lock l(&SyncRes::s_negcachelock);
1541    SyncRes::s_negcache.clear(); 
1542    return "ok\n";
1543  }
1544  catch(std::exception& e) {
1545    L<<Logger::Error<<"Had error reloading zones, keeping original data: "<<e.what()<<endl;
1546  }
1547  catch(AhuException& ae) {
1548    L<<Logger::Error<<"Encountered error reloading zones, keeping original data: "<<ae.reason<<endl;
1549  }
1550  catch(...) {
1551    L<<Logger::Error<<"Encountered unknown error reloading zones, keeping original data"<<endl;
1552  }
1553  SyncRes::s_domainmap.swap(original);
1554  return "reloading failed, see log\n";
1555}
1556
1557void parseAuthAndForwards()
1558{
1559  SyncRes::s_domainmap.clear(); // this makes us idempotent
1560
1561  TXTRecordContent::report();
1562  OPTRecordContent::report();
1563
1564  typedef vector<string> parts_t;
1565  parts_t parts; 
1566  const char *option_names[3]={"auth-zones", "forward-zones", "forward-zones-recurse"};
1567  for(int n=0; n < 3 ; ++n ) {
1568    parts.clear();
1569    stringtok(parts, ::arg()[option_names[n]], ",\t\n\r");
1570    for(parts_t::const_iterator iter = parts.begin(); iter != parts.end(); ++iter) {
1571      SyncRes::AuthDomain ad;
1572      pair<string,string> headers=splitField(*iter, '=');
1573      trim(headers.first);
1574      trim(headers.second);
1575      headers.first=toCanonic("", headers.first);
1576      if(n==0) {
1577        L<<Logger::Error<<"Parsing authoritative data for zone '"<<headers.first<<"' from file '"<<headers.second<<"'"<<endl;
1578        ZoneParserTNG zpt(headers.second, headers.first);
1579        DNSResourceRecord rr;
1580        while(zpt.get(rr)) {
1581          try {
1582            string tmp=DNSRR2String(rr);
1583            rr=String2DNSRR(rr.qname, rr.qtype, tmp, rr.ttl);
1584          }
1585          catch(std::exception &e) {
1586            throw AhuException("Error parsing record '"+rr.qname+"' of type "+rr.qtype.getName()+" in zone '"+headers.first+"' from file '"+headers.second+"': "+e.what());
1587          }
1588          catch(...) {
1589            throw AhuException("Error parsing record '"+rr.qname+"' of type "+rr.qtype.getName()+" in zone '"+headers.first+"' from file '"+headers.second+"'");
1590          }
1591
1592          ad.d_records.insert(rr);
1593        }
1594      }
1595      else {
1596        L<<Logger::Error<<"Redirecting queries for zone '"<<headers.first<<"' ";
1597        if(n == 2) {
1598          L<<"with recursion ";
1599          ad.d_rdForward = 1;
1600        }
1601        else ad.d_rdForward = 0;
1602        L<<"to: ";
1603       
1604        convertServersForAD(headers.second, ad, ";");
1605        if(n == 2) {
1606          ad.d_rdForward = 1;
1607        }
1608      }
1609     
1610      SyncRes::s_domainmap[headers.first]=ad;
1611    }
1612  }
1613 
1614  if(!::arg()["forward-zones-file"].empty()) {
1615    L<<Logger::Warning<<"Reading zone forwarding information from '"<<::arg()["forward-zones-file"]<<"'"<<endl;
1616    SyncRes::AuthDomain ad;
1617    FILE *rfp=fopen(::arg()["forward-zones-file"].c_str(), "r");
1618
1619    if(!rfp)
1620      throw AhuException("Error opening forward-zones-file '"+::arg()["forward-zones-file"]+"': "+stringerror());
1621
1622    shared_ptr<FILE> fp=shared_ptr<FILE>(rfp, fclose);
1623   
1624    char line[1024];
1625    int linenum=0;
1626    uint64_t before = SyncRes::s_domainmap.size();
1627    while(linenum++, fgets(line, sizeof(line)-1, fp.get())) {
1628      string domain, instructions;
1629      tie(domain, instructions)=splitField(line, '=');
1630      trim(domain);
1631      trim(instructions);
1632      if(boost::starts_with(domain,"+")) {
1633        domain=domain.c_str()+1;
1634        ad.d_rdForward = true;
1635      }
1636      else
1637        ad.d_rdForward = false;
1638      if(domain.empty()) 
1639        throw AhuException("Error parsing line "+lexical_cast<string>(linenum)+" of " +::arg()["forward-zones-file"]);
1640
1641      try {
1642        convertServersForAD(instructions, ad, ",; ", false);
1643      }
1644      catch(...) {
1645        throw AhuException("Conversion error parsing line "+lexical_cast<string>(linenum)+" of " +::arg()["forward-zones-file"]);
1646      }
1647
1648      SyncRes::s_domainmap[toCanonic("", domain)]=ad;
1649    }
1650    L<<Logger::Warning<<"Done parsing " << SyncRes::s_domainmap.size() - before<<" forwarding instructions from file '"<<::arg()["forward-zones-file"]<<"'"<<endl;
1651  }
1652
1653  if(::arg().mustDo("export-etc-hosts")) {
1654    string line;
1655    string fname=::arg()["etc-hosts-file"];
1656   
1657    ifstream ifs(fname.c_str());
1658    if(!ifs) {
1659      L<<Logger::Warning<<"Could not open /etc/hosts for reading"<<endl;
1660      return;
1661    }
1662   
1663    string::size_type pos;
1664    while(getline(ifs,line)) {
1665      pos=line.find('#');
1666      if(pos!=string::npos)
1667        line.resize(pos);
1668      trim(line);
1669      if(line.empty())
1670        continue;
1671      parts.clear();
1672      stringtok(parts, line, "\t\r\n ");
1673      if(parts[0].find(':')!=string::npos)
1674        continue;
1675     
1676      for(unsigned int n=1; n < parts.size(); ++n)
1677        makeNameToIPZone(parts[n], parts[0]);
1678      makeIPToNamesZone(parts);
1679    }
1680  }
1681  if(::arg().mustDo("serve-rfc1918")) {
1682    L<<Logger::Warning<<"Inserting rfc 1918 private space zones"<<endl;
1683    parts.clear();
1684    parts.push_back("127");
1685    makeIPToNamesZone(parts);
1686    parts[0]="10";
1687    makeIPToNamesZone(parts);
1688
1689    parts[0]="192.168";
1690    makeIPToNamesZone(parts);
1691    for(int n=16; n < 32; n++) {
1692      parts[0]="172."+lexical_cast<string>(n);
1693      makeIPToNamesZone(parts);
1694    }
1695  }
1696}
1697
1698string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
1699{
1700  if(begin != end) 
1701    ::arg().set("lua-dns-script") = *begin;
1702   
1703  g_luaReloadCounter = 0;
1704  return "ok, reload/unload queued\n";
1705} 
1706 
1707
1708void doReloadLuaScript()
1709{
1710  string fname= ::arg()["lua-dns-script"];
1711  try {
1712    if(fname.empty()) {
1713      t_pdl->reset();
1714      L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
1715    }
1716    else {
1717      *t_pdl = shared_ptr<PowerDNSLua>(new PowerDNSLua(fname));
1718    }
1719  }
1720  catch(std::exception& e) {
1721    L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
1722  }
1723   
1724  L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
1725}
1726
1727void* recursorThread(void*);
1728
1729void parseACLs()
1730{
1731  static bool l_initialized;
1732  if(l_initialized) {
1733    string configname=::arg()["config-dir"]+"/recursor.conf";
1734    cleanSlashes(configname);
1735   
1736    if(!::arg().preParseFile(configname.c_str(), "allow-from-file")) 
1737      L<<Logger::Warning<<"Unable to re-parse configuration file '"<<configname<<"'"<<endl;
1738   
1739    ::arg().preParseFile(configname.c_str(), "allow-from");
1740  }
1741  l_initialized = true;
1742  if(!::arg()["allow-from-file"].empty()) {
1743    string line;
1744    NetmaskGroup* allowFrom=new NetmaskGroup;
1745    ifstream ifs(::arg()["allow-from-file"].c_str());
1746    if(!ifs) {
1747      throw AhuException("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
1748    }
1749
1750    string::size_type pos;
1751    while(getline(ifs,line)) {
1752      pos=line.find('#');
1753      if(pos!=string::npos)
1754        line.resize(pos);
1755      trim(line);
1756      if(line.empty())
1757        continue;
1758
1759      allowFrom->addMask(line);
1760    }
1761    g_allowFrom = allowFrom;
1762    L<<Logger::Warning<<"Done parsing " << g_allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
1763  }
1764  else if(!::arg()["allow-from"].empty()) {
1765    NetmaskGroup* allowFrom=new NetmaskGroup;
1766    vector<string> ips;
1767    stringtok(ips, ::arg()["allow-from"], ", ");
1768    L<<Logger::Warning<<"Only allowing queries from: ";
1769    for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1770      allowFrom->addMask(*i);
1771      if(i!=ips.begin())
1772        L<<Logger::Warning<<", ";
1773      L<<Logger::Warning<<*i;
1774    }
1775    L<<Logger::Warning<<endl;
1776    g_allowFrom = allowFrom;
1777  }
1778  else if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
1779    L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
1780}
1781
1782int serviceMain(int argc, char*argv[])
1783{
1784  L.setName("pdns_recursor");
1785
1786  L.setLoglevel((Logger::Urgency)(6)); // info and up
1787
1788  if(!::arg()["logging-facility"].empty()) {
1789    boost::optional<int> val=logFacilityToLOG(::arg().asNum("logging-facility") );
1790    if(val)
1791      theL().setFacility(*val);
1792    else
1793      L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
1794  }
1795
1796  L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2009 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
1797#ifdef __GNUC__
1798  L<<", gcc "__VERSION__;
1799#endif // add other compilers here
1800#ifdef _MSC_VER
1801  L<<", MSVC "<<_MSC_VER;
1802#endif
1803  L<<") starting up"<<endl;
1804 
1805  L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
1806    "This is free software, and you are welcome to redistribute it "
1807    "according to the terms of the GPL version 2."<<endl;
1808 
1809  L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
1810 
1811  seedRandom(::arg()["entropy-source"]);
1812
1813  parseACLs();
1814 
1815  if(!::arg()["dont-query"].empty()) {
1816    g_dontQuery=new NetmaskGroup;
1817    vector<string> ips;
1818    stringtok(ips, ::arg()["dont-query"], ", ");
1819    L<<Logger::Warning<<"Will not send queries to: ";
1820    for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1821      g_dontQuery->addMask(*i);
1822      if(i!=ips.begin())
1823        L<<Logger::Warning<<", ";
1824      L<<Logger::Warning<<*i;
1825    }
1826    L<<Logger::Warning<<endl;
1827  }
1828
1829  g_quiet=::arg().mustDo("quiet");
1830  if(::arg().mustDo("trace")) {
1831    SyncRes::setLog(true);
1832    ::arg().set("quiet")="no";
1833    g_quiet=false;
1834  }
1835
1836  RC.d_followRFC2181=::arg().mustDo("auth-can-lower-ttl");
1837 
1838  try {
1839    vector<string> addrs; 
1840    if(!::arg()["query-local-address6"].empty()) {
1841      SyncRes::s_doIPv6=true;
1842      L<<Logger::Error<<"Enabling IPv6 transport for outgoing queries"<<endl;
1843     
1844      stringtok(addrs, ::arg()["query-local-address6"], ", ;");
1845      BOOST_FOREACH(const string& addr, addrs) {
1846        g_localQueryAddresses6.push_back(ComboAddress(addr));
1847      }
1848    }
1849    addrs.clear();
1850    stringtok(addrs, ::arg()["query-local-address"], ", ;");
1851    BOOST_FOREACH(const string& addr, addrs) {
1852      g_localQueryAddresses4.push_back(ComboAddress(addr));
1853    }
1854  }
1855  catch(std::exception& e) {
1856    L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
1857    exit(99);
1858  }
1859 
1860  SyncRes::s_noEDNSPing = ::arg().mustDo("disable-edns-ping");
1861  SyncRes::s_noEDNS = ::arg().mustDo("disable-edns");
1862
1863  SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
1864
1865  SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
1866  SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
1867  SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
1868  SyncRes::s_packetcacheservfailttl=::arg().asNum("packetcache-servfail-ttl");
1869  SyncRes::s_serverID=::arg()["server-id"];
1870  if(SyncRes::s_serverID.empty()) {
1871    char tmp[128];
1872    gethostname(tmp, sizeof(tmp)-1);
1873    SyncRes::s_serverID=tmp;
1874  }
1875 
1876  g_networkTimeoutMsec = ::arg().asNum("network-timeout");
1877
1878  parseAuthAndForwards();
1879 
1880 
1881  g_stats.remotes.resize(::arg().asNum("remotes-ringbuffer-entries"));
1882  if(!g_stats.remotes.empty())
1883    memset(&g_stats.remotes[0], 0, g_stats.remotes.size() * sizeof(RecursorStats::remotes_t::value_type));
1884  g_logCommonErrors=::arg().mustDo("log-common-errors");
1885 
1886  makeUDPServerSockets();
1887  makeTCPServerSockets();
1888
1889//  g_mc = new MemcachedCommunicator("127.0.0.1");
1890  //  g_dc = new DHCPCommunicator("10.0.0.11");
1891
1892  s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
1893  if(!s_pidfname.empty())
1894    unlink(s_pidfname.c_str()); // remove possible old pid file
1895 
1896#ifndef WIN32
1897  if(::arg().mustDo("fork")) {
1898    fork();
1899    L<<Logger::Warning<<"This is forked pid "<<getpid()<<endl;
1900  }
1901#endif
1902
1903  primeHints();   
1904  L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
1905#ifndef WIN32
1906  if(::arg().mustDo("daemon")) {
1907    L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
1908    L.toConsole(Logger::Critical);
1909    daemonize();
1910  }
1911  signal(SIGUSR1,usr1Handler);
1912  signal(SIGUSR2,usr2Handler);
1913  signal(SIGPIPE,SIG_IGN);
1914  writePid();
1915#endif
1916  makeControlChannelSocket();       
1917
1918  if(::arg().asNum("threads")==1) {
1919    L<<Logger::Warning<<"Operating unthreaded"<<endl;
1920    g_singleThreaded=true;
1921    recursorThread(0);
1922  }
1923  else {
1924    pthread_t tid;
1925    L<<Logger::Warning<<"Launching "<<::arg().asNum("threads")<<" threads"<<endl;
1926    for(int n=0; n < ::arg().asNum("threads"); ++n) {
1927      pthread_create(&tid, 0, recursorThread, (void*)n);
1928    }
1929    void* res;
1930    pthread_join(tid, &res);
1931  }
1932  return 0;
1933}
1934
1935void* recursorThread(void* ptr)
1936try
1937{
1938#if 0
1939  DTime dt;
1940  time_t now=time(0);
1941
1942  string templ;
1943  vector<string> names;
1944  for(int n = 0 ; n < 500000; ++n) {
1945    templ="blah"+lexical_cast<string>(n)+".testdomain.com";
1946    names.push_back(templ);
1947  }
1948  random_shuffle(names.begin(), names.end());
1949  cerr<<"go!"<<endl;
1950  dt.set();
1951  for(int n = 0 ; n < 500000; ++n) {
1952    vector<DNSResourceRecord> avect;
1953    RC.get2(now, names[n], QType(QType::AAAA), &avect); // auth, nuke it all
1954  }
1955  cerr<<"get2 secs: "<<dt.udiff()/1000000.0<<endl;
1956#endif
1957  t_id=(int) (long) ptr;
1958 
1959  t_pdl = new shared_ptr<PowerDNSLua>();
1960  g_luaReloadCounter = t_id + 1;
1961  try {
1962    if(!::arg()["lua-dns-script"].empty()) {
1963      *t_pdl = shared_ptr<PowerDNSLua>(new PowerDNSLua(::arg()["lua-dns-script"]));
1964      L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
1965    }
1966   
1967  }
1968  catch(std::exception &e) {
1969    L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
1970    exit(99);
1971  }
1972 
1973  MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
1974 
1975 
1976  PacketID pident;
1977
1978  t_fdm=getMultiplexer();
1979  if(!t_id) 
1980    L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
1981
1982
1983  for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i) 
1984    t_fdm->addReadFD(i->first, i->second);
1985 
1986  if(!t_id) {
1987    int newgid=0;
1988    if(!::arg()["setgid"].empty())
1989      newgid=Utility::makeGidNumeric(::arg()["setgid"]);
1990    int newuid=0;
1991    if(!::arg()["setuid"].empty())
1992      newuid=Utility::makeUidNumeric(::arg()["setuid"]);
1993 
1994#ifndef WIN32
1995    if (!::arg()["chroot"].empty()) {
1996      if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
1997        L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
1998        exit(1);
1999      }
2000    }
2001 
2002    Utility::dropPrivs(newuid, newgid);
2003 
2004    t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2005  }
2006#endif
2007 
2008  counter=0;
2009  unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
2010  g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2011 
2012  g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
2013 
2014 
2015  bool listenOnTCP(true);
2016 
2017  for(;;) {
2018    while(MT->schedule(&g_now)); // housekeeping, let threads do their thing
2019     
2020    if(!t_id && !(counter%500)) {
2021      MT->makeThread(houseKeeping,0);
2022    }
2023
2024    if(!(counter%55)) {
2025      typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
2026      expired_t expired=t_fdm->getTimeouts(g_now);
2027       
2028      for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
2029        TCPConnection conn=any_cast<TCPConnection>(i->second);
2030        if(g_logCommonErrors)
2031          L<<Logger::Warning<<"Timeout from remote TCP client "<< conn.remote.toString() <<endl;
2032        t_fdm->removeReadFD(i->first);
2033        conn.closeAndCleanup();
2034      }
2035    }
2036     
2037    counter++;
2038
2039    if(g_luaReloadCounter == t_id) {
2040      g_luaReloadCounter++;
2041      doReloadLuaScript();
2042    }
2043
2044    if(statsWanted) {
2045      doStats();
2046    }
2047
2048    Utility::gettimeofday(&g_now, 0);
2049    t_fdm->run(&g_now);
2050    // 'run' updates g_now for us
2051
2052    if(listenOnTCP) {
2053      if(TCPConnection::s_currentConnections > maxTcpClients) {  // shutdown, too many connections
2054        for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2055          t_fdm->removeReadFD(*i);
2056        listenOnTCP=false;
2057      }
2058    }
2059    else {
2060      if(TCPConnection::s_currentConnections <= maxTcpClients) {  // reenable
2061        for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2062          t_fdm->addReadFD(*i, handleNewTCPQuestion);
2063        listenOnTCP=true;
2064      }
2065    }
2066  }
2067}
2068catch(AhuException &ae) {
2069  L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2070  return 0;
2071}
2072catch(std::exception &e) {
2073   L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2074   return 0;
2075}
2076catch(...) {
2077   L<<Logger::Error<<"any other exception in main: "<<endl;
2078   return 0;
2079}
2080
2081#ifdef WIN32
2082void doWindowsServiceArguments(RecursorService& recursor)
2083{
2084  if(::arg().mustDo( "register-service" )) {
2085    if ( !recursor.registerService( "The PowerDNS Recursor.", true )) {
2086      cerr << "Could not register service." << endl;
2087      exit( 99 );
2088    }
2089   
2090    exit( 0 );
2091  }
2092
2093  if ( ::arg().mustDo( "unregister-service" )) {
2094    recursor.unregisterService();
2095    exit( 0 );
2096  }
2097}
2098#endif
2099
2100
2101int main(int argc, char **argv) 
2102{
2103  g_stats.startupTime=time(0);
2104  reportBasicTypes();
2105
2106  int ret = EXIT_SUCCESS;
2107#ifdef WIN32
2108  RecursorService service;
2109  WSADATA wsaData;
2110  if(WSAStartup( MAKEWORD( 2, 2 ), &wsaData )) {
2111    cerr<<"Unable to initialize winsock\n";
2112    exit(1);
2113  }
2114#endif // WIN32
2115
2116  try {
2117    ::arg().set("stack-size","stack size per mthread")="200000";
2118    ::arg().set("soa-minimum-ttl","Don't change")="0";
2119    ::arg().set("soa-serial-offset","Don't change")="0";
2120    ::arg().set("no-shuffle","Don't change")="off";
2121    ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
2122    ::arg().set("local-port","port to listen on")="53";
2123    ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
2124    ::arg().set("trace","if we should output heaps of logging")="off";
2125    ::arg().set("daemon","Operate as a daemon")="yes";
2126    ::arg().set("log-common-errors","If we should log rather common errors")="yes";
2127    ::arg().set("chroot","switch to chroot jail")="";
2128    ::arg().set("setgid","If set, change group id to this gid for more security")="";
2129    ::arg().set("setuid","If set, change user id to this uid for more security")="";
2130    ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
2131    ::arg().set("threads", "Launch this number of threads")="2";
2132#ifdef WIN32
2133    ::arg().set("quiet","Suppress logging of questions and answers")="off";
2134    ::arg().setSwitch( "register-service", "Register the service" )= "no";
2135    ::arg().setSwitch( "unregister-service", "Unregister the service" )= "no";
2136    ::arg().setSwitch( "ntservice", "Run as service" )= "no";
2137    ::arg().setSwitch( "use-ntlog", "Use the NT logging facilities" )= "yes"; 
2138    ::arg().setSwitch( "use-logfile", "Use a log file" )= "no"; 
2139    ::arg().setSwitch( "logfile", "Filename of the log file" )= "recursor.log"; 
2140#else
2141    ::arg().set("quiet","Suppress logging of questions and answers")="";
2142    ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
2143#endif
2144    ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
2145#ifndef WIN32
2146    ::arg().set("socket-owner","Owner of socket")="";
2147    ::arg().set("socket-group","Group of socket")="";
2148    ::arg().set("socket-mode", "Permissions for socket")="";
2149#endif
2150   
2151    ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
2152    ::arg().set("delegation-only","Which domains we only accept delegations from")="";
2153    ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
2154    ::arg().set("query-local-address6","Source IPv6 address for sending queries")="";
2155    ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
2156    ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
2157    ::arg().set("hint-file", "If set, load root hints from this file")="";
2158    ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
2159    ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
2160    ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
2161    ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
2162    ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
2163    ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
2164    ::arg().set("remotes-ringbuffer-entries", "maximum number of packets to store statistics for")="0";
2165    ::arg().set("version-string", "string reported on version.pdns or version.bind")="PowerDNS Recursor "VERSION" $Id$";
2166    ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fe80::/10";
2167    ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
2168    ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
2169    ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fe80::/10";
2170    ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
2171    ::arg().set("fork", "If set, fork the daemon for possible double performance")="no";
2172    ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
2173    ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
2174    ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
2175    ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
2176    ::arg().set("forward-zones-recurse", "Zones for which we forward queries, comma separated domain=ip pairs")="";
2177    ::arg().set("forward-zones-file", "File with domain=ip pairs for forwarding")="";
2178    ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
2179    ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
2180    ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
2181    ::arg().set("auth-can-lower-ttl", "If we follow RFC 2181 to the letter, an authoritative server can lower the TTL of NS records")="off";
2182    ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
2183    ::arg().setSwitch( "ignore-rd-bit", "Assume each packet requires recursion, for compatability" )= "off"; 
2184    ::arg().setSwitch( "disable-edns-ping", "Disable EDNSPing" )= "no"; 
2185    ::arg().setSwitch( "disable-edns", "Disable EDNS" )= ""; 
2186    ::arg().setSwitch( "disable-packetcache", "Disable packetcahe" )= "no"; 
2187
2188    ::arg().setCmd("help","Provide a helpful message");
2189    ::arg().setCmd("version","Print version string ("VERSION")");
2190    ::arg().setCmd("config","Output blank configuration");
2191    L.toConsole(Logger::Info);
2192    ::arg().laxParse(argc,argv); // do a lax parse
2193
2194    string configname=::arg()["config-dir"]+"/recursor.conf";
2195    cleanSlashes(configname);
2196
2197    if(!::arg().file(configname.c_str())) 
2198      L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
2199
2200    ::arg().parse(argc,argv);
2201
2202    ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
2203
2204    if(::arg().mustDo("help")) {
2205      cerr<<"syntax:"<<endl<<endl;
2206      cerr<<::arg().helpstring(::arg()["help"])<<endl;
2207      exit(99);
2208    }
2209    if(::arg().mustDo("version")) {
2210      cerr<<"version: "VERSION<<endl;
2211      exit(99);
2212    }
2213
2214    if(::arg().mustDo("config")) {
2215      cout<<::arg().configstring()<<endl;
2216      exit(0);
2217    }
2218
2219
2220#ifndef WIN32
2221    serviceMain(argc, argv);
2222#else
2223    doWindowsServiceArguments(service);
2224        L.toNTLog();
2225    RecursorService::instance()->start( argc, argv, ::arg().mustDo( "ntservice" )); 
2226#endif
2227
2228  }
2229  catch(AhuException &ae) {
2230    L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2231    ret=EXIT_FAILURE;
2232  }
2233  catch(std::exception &e) {
2234    L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2235    ret=EXIT_FAILURE;
2236  }
2237  catch(...) {
2238    L<<Logger::Error<<"any other exception in main: "<<endl;
2239    ret=EXIT_FAILURE;
2240  }
2241 
2242#ifdef WIN32
2243  WSACleanup();
2244#endif // WIN32
2245
2246  return ret;
2247}
Note: See TracBrowser for help on using the browser.