root/trunk/pdns/pdns/pdns_recursor.cc @ 923

Revision 923, 54.1 KB (checked in by ahu, 7 years ago)

implement 'dont-query', and enable it by default, which means we no longer query rfc1918 space, nor 127.0.0.1

  • Property svn:eol-style set to native
  • Property svn:keywords set to author date id revision
Line 
1/*
2    PowerDNS Versatile Database Driven Nameserver
3    Copyright (C) 2003 - 2006  PowerDNS.COM BV
4
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License version 2
7    as published by the Free Software Foundation
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17*/
18
19#ifndef WIN32
20# include <netdb.h>
21# include <unistd.h>
22#else
23 #include "ntservice.hh"
24 #include "recursorservice.hh"
25#endif // WIN32
26
27#include "utility.hh"
28#include <iostream>
29#include <errno.h>
30#include <map>
31#include <set>
32#include "recursor_cache.hh"
33#include <stdio.h>
34#include <signal.h>
35#include <stdlib.h>
36
37#include "mtasker.hh"
38#include <utility>
39#include "arguments.hh"
40#include "syncres.hh"
41#include <fcntl.h>
42#include <fstream>
43#include "sstuff.hh"
44#include <boost/tuple/tuple.hpp>
45#include <boost/tuple/tuple_comparison.hpp>
46#include <boost/shared_array.hpp>
47#include <boost/lexical_cast.hpp>
48#include <boost/function.hpp>
49#include <boost/algorithm/string.hpp>
50#include "dnsparser.hh"
51#include "dnswriter.hh"
52#include "dnsrecords.hh"
53#include "zoneparser-tng.hh"
54#include "rec_channel.hh"
55#include "logger.hh"
56#include "iputils.hh"
57#include "mplexer.hh"
58#include "config.h"
59
60#ifndef RECURSOR
61#include "statbag.hh"
62StatBag S;
63#endif
64
65FDMultiplexer* g_fdm;
66unsigned int g_maxTCPPerClient;
67bool g_logCommonErrors;
68using namespace boost;
69
70#ifdef __FreeBSD__           // see cvstrac ticket #26
71#include <pthread.h>
72#include <semaphore.h>
73#endif
74
75MemRecursorCache RC;
76RecursorStats g_stats;
77bool g_quiet;
78NetmaskGroup* g_allowFrom;
79NetmaskGroup* g_dontQuery;
80string s_programname="pdns_recursor";
81typedef vector<int> g_tcpListenSockets_t;
82g_tcpListenSockets_t g_tcpListenSockets;
83int g_tcpTimeout;
84
85struct DNSComboWriter {
86  DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(data, len), d_now(now), d_tcp(false), d_socket(-1)
87  {}
88  MOADNSParser d_mdp;
89  void setRemote(ComboAddress* sa)
90  {
91    d_remote=*sa;
92  }
93
94  void setSocket(int sock)
95  {
96    d_socket=sock;
97  }
98
99  string getRemote() const
100  {
101    return d_remote.toString();
102  }
103
104  struct timeval d_now;
105  ComboAddress d_remote;
106  bool d_tcp;
107  int d_socket;
108};
109
110
111#ifndef WIN32
112#ifndef __FreeBSD__
113extern "C" {
114  int sem_init(sem_t*, int, unsigned int){return 0;}
115  int sem_wait(sem_t*){return 0;}
116  int sem_trywait(sem_t*){return 0;}
117  int sem_post(sem_t*){return 0;}
118  int sem_getvalue(sem_t*, int*){return 0;}
119  pthread_t pthread_self(void){return (pthread_t) 0;}
120  int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr){ return 0; }
121  int pthread_mutex_lock(pthread_mutex_t *mutex){ return 0; }
122  int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
123  int pthread_mutex_destroy(pthread_mutex_t *mutex) { return 0; }
124}
125#endif // __FreeBSD__
126#endif // WIN32
127
128ArgvMap &arg()
129{
130  static ArgvMap theArg;
131  return theArg;
132}
133
134struct timeval g_now;
135typedef vector<int> tcpserversocks_t;
136
137MT_t* MT; // the big MTasker
138
139void handleTCPClientWritable(int fd, boost::any& var);
140
141// -1 is error, 0 is timeout, 1 is success
142int asendtcp(const string& data, Socket* sock) 
143{
144  PacketID pident;
145  pident.sock=sock;
146  pident.outMSG=data;
147
148  g_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
149  string packet;
150
151  int ret=MT->waitEvent(pident,&packet,1);
152  if(!ret || ret==-1) { // timeout
153    g_fdm->removeWriteFD(sock->getHandle());
154  }
155  else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
156    return -1;
157  }
158  return ret;
159}
160
161void handleTCPClientReadable(int fd, boost::any& var);
162
163// -1 is error, 0 is timeout, 1 is success
164int arecvtcp(string& data, int len, Socket* sock) 
165{
166  data.clear();
167  PacketID pident;
168  pident.sock=sock;
169  pident.inNeeded=len;
170  g_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
171
172  int ret=MT->waitEvent(pident,&data,1);
173  if(!ret || ret==-1) { // timeout
174    g_fdm->removeReadFD(sock->getHandle());
175  }
176  else if(data.empty()) {// error, EOF or other
177    return -1;
178  }
179
180  return ret;
181}
182
183// returns -1 for errors which might go away, throws for ones that won't
184int makeClientSocket(int family)
185{
186  int ret=(int)socket(family, SOCK_DGRAM, 0);
187  if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
188    return ret;
189
190  if(ret<0) 
191    throw AhuException("Making a socket for resolver: "+stringerror());
192
193  static optional<ComboAddress> sin4;
194  if(!sin4) {
195    sin4=ComboAddress(::arg()["query-local-address"]);
196  }
197  static optional<ComboAddress> sin6;
198  if(!sin6) {
199    if(!::arg()["query-local-address6"].empty())
200    sin6=ComboAddress(::arg()["query-local-address6"]);
201  }
202
203  int tries=10;
204  while(--tries) {
205    uint16_t port=1025+Utility::random()%64510;
206    if(tries==1)  // fall back to kernel 'random'
207        port=0;
208
209    if(family==AF_INET) {
210      sin4->sin4.sin_port = htons(port); 
211     
212      if (::bind(ret, (struct sockaddr *)&*sin4, sin4->getSocklen()) >= 0) 
213        break;
214    }
215    else {
216      sin6->sin6.sin6_port = htons(port); 
217     
218      if (::bind(ret, (struct sockaddr *)&*sin6, sin6->getSocklen()) >= 0) 
219        break;
220    }
221  }
222  if(!tries)
223    throw AhuException("Resolver binding to local query client socket: "+stringerror());
224
225  Utility::setNonBlocking(ret);
226  return ret;
227}
228
229void handleUDPServerResponse(int fd, boost::any&);
230
231// you can ask this class for a UDP socket to send a query from
232// this socket is not yours, don't even think about deleting it
233// but after you call 'returnSocket' on it, don't assume anything anymore
234class UDPClientSocks
235{
236  unsigned int d_numsocks;
237  unsigned int d_maxsocks;
238
239public:
240  UDPClientSocks() : d_numsocks(0), d_maxsocks(5000)
241  {
242  }
243
244  typedef set<int> socks_t;
245  socks_t d_socks;
246
247  // returning -1 means: temporary OS error (ie, out of files)
248  int getSocket(uint16_t family)
249  {
250    int fd=makeClientSocket(family);
251    if(fd < 0) // temporary error - receive exception otherwise
252      return -1;
253
254    d_socks.insert(fd);
255    d_numsocks++;
256    return fd;
257  }
258
259  void returnSocket(int fd)
260  {
261    socks_t::iterator i=d_socks.find(fd);
262    if(i==d_socks.end()) {
263      throw AhuException("Trying to return a socket (fd="+lexical_cast<string>(fd)+") not in the pool");
264    }
265    returnSocket(i);
266  }
267
268  // return a socket to the pool, or simply erase it
269  void returnSocket(socks_t::iterator& i)
270  {
271    if(i==d_socks.end()) {
272      throw AhuException("Trying to return a socket not in the pool");
273    }
274    try {
275      g_fdm->removeReadFD(*i);
276    }
277    catch(FDMultiplexerException& e) {
278      // we sometimes return a socket that has not yet been assigned to g_fdm
279    }
280    Utility::closesocket(*i);
281   
282    d_socks.erase(i++);
283    --d_numsocks;
284  }
285}g_udpclientsocks;
286
287
288/* these two functions are used by LWRes */
289// -2 is OS error, -1 is error that depends on the remote, > 0 is success
290int asendto(const char *data, int len, int flags, 
291            const ComboAddress& toaddr, uint16_t id, const string& domain, uint16_t qtype, int* fd) 
292{
293
294  PacketID pident;
295  pident.domain = domain;
296  pident.remote = toaddr;
297  pident.type = qtype;
298
299  // see if there is an existing outstanding request we can chain on to, using partial equivalence function
300  pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
301
302  for(; chain.first != chain.second; chain.first++) {
303    if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
304      //      cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
305      // cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
306      // <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
307     
308      chain.first->key.chain.insert(id); // we can chain
309      *fd=-1;                            // gets used in waitEvent / sendEvent later on
310      return 1;
311    }
312  }
313
314  *fd=g_udpclientsocks.getSocket(toaddr.sin4.sin_family);
315  if(*fd < 0)
316    return -2;
317
318  pident.fd=*fd;
319  pident.id=id;
320 
321  int ret=connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen());
322  if(ret < 0) {
323    g_udpclientsocks.returnSocket(*fd);
324    if(errno==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
325      return -2;
326    return ret;
327  }
328
329  g_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
330  ret=send(*fd, data, len, 0);
331  if(ret < 0)
332    g_udpclientsocks.returnSocket(*fd);
333  return ret;
334}
335
336// -1 is error, 0 is timeout, 1 is success
337int arecvfrom(char *data, int len, int flags, const ComboAddress& fromaddr, int *d_len, 
338              uint16_t id, const string& domain, uint16_t qtype, int fd, unsigned int now)
339{
340  static optional<unsigned int> nearMissLimit;
341  if(!nearMissLimit) 
342    nearMissLimit=::arg().asNum("spoof-nearmiss-max");
343
344  PacketID pident;
345  pident.fd=fd;
346  pident.id=id;
347  pident.domain=domain;
348  pident.type = qtype;
349  pident.remote=fromaddr;
350
351  string packet;
352  int ret=MT->waitEvent(pident, &packet, 1, now);
353
354  if(ret > 0) {
355    if(packet.empty()) // means "error"
356      return -1; 
357
358    *d_len=(int)packet.size();
359    memcpy(data,packet.c_str(),min(len,*d_len));
360    if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
361      L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
362      g_stats.spoofCount++;
363      return -1;
364    }
365  }
366  else {
367    if(fd >= 0)
368      g_udpclientsocks.returnSocket(fd);
369  }
370  return ret;
371}
372
373void setBuffer(int fd, int optname, uint32_t size)
374{
375  uint32_t psize=0;
376  socklen_t len=sizeof(psize);
377 
378  if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
379    L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
380    return; 
381  }
382
383  if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
384    L<<Logger::Error<<"Warning: unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
385}
386
387
388static void setReceiveBuffer(int fd, uint32_t size)
389{
390  setBuffer(fd, SO_RCVBUF, size);
391}
392
393static void setSendBuffer(int fd, uint32_t size)
394{
395  setBuffer(fd, SO_SNDBUF, size);
396}
397
398static void writePid(void)
399{
400  string fname=::arg()["socket-dir"]+"/"+s_programname+".pid";
401  ofstream of(fname.c_str());
402  if(of)
403    of<< Utility::getpid() <<endl;
404  else
405    L<<Logger::Error<<"Requested to write pid for "<<Utility::getpid()<<" to "<<fname<<" failed: "<<strerror(errno)<<endl;
406}
407
408void primeHints(void)
409{
410  // prime root cache
411  set<DNSResourceRecord>nsset;
412
413  if(::arg()["hint-file"].empty()) {
414    static char*ips[]={"198.41.0.4", "192.228.79.201", "192.33.4.12", "128.8.10.90", "192.203.230.10", "192.5.5.241", "192.112.36.4", "128.63.2.53", 
415                       "192.36.148.17","192.58.128.30", "193.0.14.129", "198.32.64.12", "202.12.27.33"};
416    DNSResourceRecord arr, nsrr;
417    arr.qtype=QType::A;
418    arr.ttl=time(0)+3600000;
419    nsrr.qtype=QType::NS;
420    nsrr.ttl=time(0)+3600000;
421   
422    for(char c='a';c<='m';++c) {
423      static char templ[40];
424      strncpy(templ,"a.root-servers.net.", sizeof(templ) - 1);
425      *templ=c;
426      arr.qname=nsrr.content=templ;
427      arr.content=ips[c-'a'];
428      set<DNSResourceRecord> aset;
429      aset.insert(arr);
430      RC.replace(time(0), string(templ), QType(QType::A), aset, true); // auth, nuke it all
431     
432      nsset.insert(nsrr);
433    }
434  }
435  else {
436    ZoneParserTNG zpt(::arg()["hint-file"]);
437    DNSResourceRecord rr;
438    set<DNSResourceRecord> aset;
439
440    while(zpt.get(rr)) {
441      rr.ttl+=time(0);
442      if(rr.qtype.getCode()==QType::A) {
443        set<DNSResourceRecord> aset;
444        aset.insert(rr);
445        RC.replace(time(0), rr.qname, QType(QType::A), aset, true); // auth, etc see above
446      }
447      if(rr.qtype.getCode()==QType::NS) {
448        rr.content=toLower(rr.content);
449        nsset.insert(rr);
450      }
451    }
452  }
453  RC.replace(time(0),".", QType(QType::NS), nsset, true); // and stuff in the cache (auth)
454}
455
456map<ComboAddress, uint32_t> g_tcpClientCounts;
457
458struct TCPConnection
459{
460  int fd;
461  enum stateenum {BYTE0, BYTE1, GETQUESTION, DONE} state;
462  int qlen;
463  int bytesread;
464  ComboAddress remote;
465  char data[65535];
466  time_t startTime;
467
468  static void closeAndCleanup(int fd, const ComboAddress& remote) 
469  {
470    Utility::closesocket(fd);
471    if(!g_tcpClientCounts[remote]--) 
472      g_tcpClientCounts.erase(remote);
473    s_currentConnections--;
474  }
475  void closeAndCleanup()
476  {
477    closeAndCleanup(fd, remote);
478  }
479  static unsigned int s_currentConnections; //!< total number of current TCP connections
480};
481
482unsigned int TCPConnection::s_currentConnections; 
483void handleRunningTCPQuestion(int fd, boost::any& var);
484
485void startDoResolve(void *p)
486{
487  DNSComboWriter* dc=(DNSComboWriter *)p;
488  try {
489    uint16_t maxudpsize=512;
490    MOADNSParser::EDNSOpts edo;
491    if(dc->d_mdp.getEDNSOpts(&edo)) {
492      maxudpsize=edo.d_packetsize;
493    }
494   
495    vector<DNSResourceRecord> ret;
496   
497    vector<uint8_t> packet;
498    DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
499
500    pw.getHeader()->aa=0;
501    pw.getHeader()->ra=1;
502    pw.getHeader()->qr=1;
503    pw.getHeader()->id=dc->d_mdp.d_header.id;
504    pw.getHeader()->rd=dc->d_mdp.d_header.rd;
505
506    SyncRes sr(dc->d_now);
507    if(!g_quiet)
508      L<<Logger::Error<<"["<<MT->getTid()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
509       <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote()<<endl;
510
511    sr.setId(MT->getTid());
512    if(!dc->d_mdp.d_header.rd)
513      sr.setCacheOnly();
514
515    int res=sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
516    if(res<0) {
517      pw.getHeader()->rcode=RCode::ServFail;
518      // no commit here, because no record
519      g_stats.servFails++;
520    }
521    else {
522      pw.getHeader()->rcode=res;
523      switch(res) {
524      case RCode::ServFail:
525        g_stats.servFails++;
526        break;
527      case RCode::NXDomain:
528        g_stats.nxDomains++;
529        break;
530      case RCode::NoError:
531        g_stats.noErrors++;
532        break;
533      }
534     
535      if(ret.size()) {
536        shuffle(ret);
537        for(vector<DNSResourceRecord>::const_iterator i=ret.begin();i!=ret.end();++i) {
538          pw.startRecord(i->qname, i->qtype.getCode(), i->ttl, i->qclass, (DNSPacketWriter::Place)i->d_place);
539          shared_ptr<DNSRecordContent> drc(DNSRecordContent::mastermake(i->qtype.getCode(), i->qclass, i->content));
540         
541          drc->toPacket(pw);
542       
543          if(!dc->d_tcp && pw.size() > maxudpsize) {
544            pw.rollback();
545            if(i->d_place==DNSResourceRecord::ANSWER)  // only truncate if we actually omitted parts of the answer
546              pw.getHeader()->tc=1;
547            goto sendit; // need to jump over pw.commit
548          }
549        }
550        pw.commit();
551      }
552    }
553  sendit:;
554    if(!dc->d_tcp) {
555      sendto(dc->d_socket, (const char*)&*packet.begin(), packet.size(), 0, (struct sockaddr *)(&dc->d_remote), dc->d_remote.getSocklen());
556    }
557    else {
558      char buf[2];
559      buf[0]=packet.size()/256;
560      buf[1]=packet.size()%256;
561
562      Utility::iovec iov[2];
563
564      iov[0].iov_base=(void*)buf;              iov[0].iov_len=2;
565      iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
566
567      int ret=Utility::writev(dc->d_socket, iov, 2);
568      bool hadError=true;
569
570      if(ret == 0) 
571        L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
572      else if(ret < 0 ) 
573        L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
574      else if((unsigned int)ret != 2 + packet.size())
575        L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<ret<<")"<<endl;
576      else
577        hadError=false;
578     
579      // update tcp connection status, either by closing or moving to 'BYTE0'
580
581      if(hadError) {
582        g_fdm->removeReadFD(dc->d_socket);
583        TCPConnection::closeAndCleanup(dc->d_socket, dc->d_remote);
584      }
585      else {
586        TCPConnection tc;
587        tc.fd=dc->d_socket;
588        tc.state=TCPConnection::BYTE0;
589        tc.remote=dc->d_remote;
590        Utility::gettimeofday(&g_now, 0); // needs to be updated
591        tc.startTime=g_now.tv_sec;
592        g_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
593        g_fdm->setReadTTD(tc.fd, g_now, g_tcpTimeout);
594      }
595    }
596
597    if(!g_quiet) {
598      L<<Logger::Error<<"["<<MT->getTid()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
599      L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
600        sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
601    }
602   
603    sr.d_outqueries ? RC.cacheMisses++ : RC.cacheHits++; 
604    float spent=makeFloat(sr.d_now-dc->d_now);
605    if(spent < 0.001)
606      g_stats.answers0_1++;
607    else if(spent < 0.010)
608      g_stats.answers1_10++;
609    else if(spent < 0.1)
610      g_stats.answers10_100++;
611    else if(spent < 1.0)
612      g_stats.answers100_1000++;
613    else
614      g_stats.answersSlow++;
615
616    uint64_t newLat=(uint64_t)(spent*1000000);
617    if(newLat < 1000000)  // outliers of several minutes exist..
618      g_stats.avgLatencyUsec=(uint64_t)((1-0.0001)*g_stats.avgLatencyUsec + 0.0001*newLat);
619    delete dc;
620  }
621  catch(AhuException &ae) {
622    L<<Logger::Error<<"startDoResolve problem: "<<ae.reason<<endl;
623  }
624  catch(MOADNSException& e) {
625    L<<Logger::Error<<"DNS parser error: "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
626  }
627  catch(exception& e) {
628    L<<Logger::Error<<"STL error: "<<e.what()<<endl;
629  }
630  catch(...) {
631    L<<Logger::Error<<"Any other exception in a resolver context"<<endl;
632  }
633}
634
635RecursorControlChannel s_rcc;
636
637void makeControlChannelSocket()
638{
639  string sockname=::arg()["socket-dir"]+"/pdns_recursor.controlsocket";
640  if(::arg().mustDo("fork")) {
641    sockname+="."+lexical_cast<string>(Utility::getpid());
642    L<<Logger::Warning<<"Forked control socket name: "<<sockname<<endl;
643  }
644  s_rcc.listen(sockname);
645}
646
647void handleRunningTCPQuestion(int fd, boost::any& var)
648{
649  TCPConnection* conn=any_cast<TCPConnection>(&var);
650
651  if(conn->state==TCPConnection::BYTE0) {
652    int bytes=recv(conn->fd, conn->data, 2, 0);
653    if(bytes==1)
654      conn->state=TCPConnection::BYTE1;
655    if(bytes==2) { 
656      conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
657      conn->bytesread=0;
658      conn->state=TCPConnection::GETQUESTION;
659    }
660    if(!bytes || bytes < 0) {
661      TCPConnection tmp(*conn); 
662      g_fdm->removeReadFD(fd);
663      tmp.closeAndCleanup();
664      return;
665    }
666  }
667  else if(conn->state==TCPConnection::BYTE1) {
668    int bytes=recv(conn->fd, conn->data+1, 1, 0);
669    if(bytes==1) {
670      conn->state=TCPConnection::GETQUESTION;
671      conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
672      conn->bytesread=0;
673    }
674    if(!bytes || bytes < 0) {
675      if(g_logCommonErrors)
676        L<<Logger::Error<<"TCP client "<< conn->remote.toString() <<" disconnected after first byte"<<endl;
677      TCPConnection tmp(*conn); 
678      g_fdm->removeReadFD(fd);
679      tmp.closeAndCleanup();  // conn loses validity here..
680      return;
681    }
682  }
683  else if(conn->state==TCPConnection::GETQUESTION) {
684    int bytes=recv(conn->fd, conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
685    if(!bytes || bytes < 0) {
686      L<<Logger::Error<<"TCP client "<< conn->remote.toString() <<" disconnected while reading question body"<<endl;
687      TCPConnection tmp(*conn);
688      g_fdm->removeReadFD(fd);
689      tmp.closeAndCleanup();  // conn loses validity here..
690
691      return;
692    }
693    conn->bytesread+=bytes;
694    if(conn->bytesread==conn->qlen) {
695      TCPConnection tconn(*conn); 
696      g_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
697
698      DNSComboWriter* dc=0;
699      try {
700        dc=new DNSComboWriter(tconn.data, tconn.qlen, g_now);
701      }
702      catch(MOADNSException &mde) {
703        g_stats.clientParseError++; 
704        if(g_logCommonErrors)
705          L<<Logger::Error<<"Unable to parse packet from TCP client "<< tconn.remote.toString() <<endl;
706        tconn.closeAndCleanup();
707        return;
708      }
709     
710      dc->setSocket(tconn.fd);
711      dc->d_tcp=true;
712      dc->setRemote(&tconn.remote);
713      if(dc->d_mdp.d_header.qr) {
714        delete dc;
715        L<<Logger::Error<<"Ignoring answer on server socket!"<<endl;
716        tconn.closeAndCleanup();
717        return;
718      }
719      else {
720        ++g_stats.qcounter;
721        ++g_stats.tcpqcounter;
722        MT->makeThread(startDoResolve, dc); // deletes dc
723        return;
724      }
725    }
726  }
727}
728
729//! Handle new incoming TCP connection
730void handleNewTCPQuestion(int fd, boost::any& )
731{
732  ComboAddress addr;
733  socklen_t addrlen=sizeof(addr);
734  int newsock=(int)accept(fd, (struct sockaddr*)&addr, &addrlen);
735  if(newsock>0) {
736    g_stats.addRemote(addr);
737    if(g_allowFrom && !g_allowFrom->match(&addr)) {
738      if(!g_quiet) 
739        L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
740
741      g_stats.unauthorizedTCP++;
742      Utility::closesocket(newsock);
743      return;
744    }
745   
746    if(g_maxTCPPerClient && g_tcpClientCounts.count(addr) && g_tcpClientCounts[addr] >= g_maxTCPPerClient) {
747      g_stats.tcpClientOverflow++;
748      Utility::closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
749      return;
750    }
751    g_tcpClientCounts[addr]++;
752    Utility::setNonBlocking(newsock);
753    TCPConnection tc;
754    tc.fd=newsock;
755    tc.state=TCPConnection::BYTE0;
756    tc.remote=addr;
757    tc.startTime=g_now.tv_sec;
758    TCPConnection::s_currentConnections++;
759    g_fdm->addReadFD(tc.fd, handleRunningTCPQuestion, tc);
760
761    struct timeval now;
762    Utility::gettimeofday(&now, 0);
763    g_fdm->setReadTTD(tc.fd, now, g_tcpTimeout);
764  }
765}
766 
767void handleNewUDPQuestion(int fd, boost::any& var)
768{
769  int len;
770  char data[1500];
771  ComboAddress fromaddr;
772  socklen_t addrlen=sizeof(fromaddr);
773
774  if((len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen)) >= 0) {
775    g_stats.addRemote(fromaddr);
776    if(g_allowFrom && !g_allowFrom->match(&fromaddr)) {
777      if(!g_quiet) 
778        L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
779
780      g_stats.unauthorizedUDP++;
781      return;
782    }
783    try {
784      DNSComboWriter* dc = new DNSComboWriter(data, len, g_now);
785      dc->setRemote(&fromaddr);
786     
787      if(dc->d_mdp.d_header.qr) {
788        if(g_logCommonErrors)
789          L<<Logger::Error<<"Ignoring answer from "<<dc->getRemote()<<" on server socket!"<<endl;
790        delete dc;
791      }
792      else {
793        ++g_stats.qcounter;
794        dc->setSocket(fd);
795        dc->d_tcp=false;
796        MT->makeThread(startDoResolve, (void*) dc); // deletes dc
797      }
798    }
799    catch(MOADNSException& mde) {
800      g_stats.clientParseError++; 
801      if(g_logCommonErrors)
802        L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
803    }
804  }
805}
806
807typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
808deferredAdd_t deferredAdd;
809
810void makeTCPServerSockets()
811{
812  int fd;
813  vector<string>locals;
814  stringtok(locals,::arg()["local-address"]," ,");
815
816  if(locals.empty())
817    throw AhuException("No local address specified");
818 
819  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
820    ServiceTuple st;
821    st.port=::arg().asNum("local-port");
822    parseService(*i, st);
823   
824    ComboAddress sin;
825
826    memset((char *)&sin,0, sizeof(sin));
827    sin.sin4.sin_family = AF_INET;
828    if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
829      sin.sin6.sin6_family = AF_INET6;
830      if(Utility::inet_pton(AF_INET6, st.host.c_str(), &sin.sin6.sin6_addr) <= 0)
831        throw AhuException("Unable to resolve local address for TCP server on '"+ st.host +"'"); 
832    }
833
834    fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
835    if(fd<0) 
836      throw AhuException("Making a TCP server socket for resolver: "+stringerror());
837
838    int tmp=1;
839    if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
840      L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
841      exit(1);
842    }
843   
844#ifdef TCP_DEFER_ACCEPT
845    if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
846      if(i==locals.begin())
847        L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
848    }
849#endif
850
851    sin.sin4.sin_port = htons(st.port);
852    int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
853    if (::bind(fd, (struct sockaddr *)&sin, socklen )<0) 
854      throw AhuException("Binding TCP server socket for "+ st.host +": "+stringerror());
855   
856    Utility::setNonBlocking(fd);
857    setSendBuffer(fd, 65000);
858    listen(fd, 128);
859    deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
860    g_tcpListenSockets.push_back(fd);
861
862    if(sin.sin4.sin_family == AF_INET) 
863      L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
864    else
865      L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
866  }
867}
868
869void makeUDPServerSockets()
870{
871  vector<string>locals;
872  stringtok(locals,::arg()["local-address"]," ,");
873
874  if(locals.empty())
875    throw AhuException("No local address specified");
876 
877  if(::arg()["local-address"]=="0.0.0.0") {
878    L<<Logger::Warning<<"It is advised to bind to explicit addresses with the --local-address option"<<endl;
879  }
880
881  for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
882    ServiceTuple st;
883    st.port=::arg().asNum("local-port");
884    parseService(*i, st);
885
886    ComboAddress sin;
887
888    memset(&sin, 0, sizeof(sin));
889    sin.sin4.sin_family = AF_INET;
890    if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
891      sin.sin6.sin6_family = AF_INET6;
892      if(Utility::inet_pton(AF_INET6, st.host.c_str(), &sin.sin6.sin6_addr) <= 0)
893        throw AhuException("Unable to resolve local address for UDP server on '"+ st.host +"'"); 
894    }
895   
896    int fd=socket(sin.sin4.sin_family, SOCK_DGRAM,0);
897    if(fd < 0) {
898      throw AhuException("Making a UDP server socket for resolver: "+netstringerror());
899    }
900
901    setReceiveBuffer(fd, 200000);
902    sin.sin4.sin_port = htons(st.port);
903
904    int socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
905    if (::bind(fd, (struct sockaddr *)&sin, socklen)<0) 
906      throw AhuException("Resolver binding to server socket on port "+ lexical_cast<string>(st.port) +" for "+ st.host+": "+stringerror());
907   
908    Utility::setNonBlocking(fd);
909    //    g_fdm->addReadFD(fd, handleNewUDPQuestion);
910    deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
911
912    if(sin.sin4.sin_family == AF_INET) 
913      L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
914    else
915      L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
916  }
917}
918
919
920#ifndef WIN32
921void daemonize(void)
922{
923  if(fork())
924    exit(0); // bye bye
925 
926  setsid(); 
927
928  int i=open("/dev/null",O_RDWR); /* open stdin */
929  if(i < 0) 
930    L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
931  else {
932    dup2(i,0); /* stdin */
933    dup2(i,1); /* stderr */
934    dup2(i,2); /* stderr */
935    close(i);
936  }
937}
938#endif
939
940uint64_t counter;
941bool statsWanted;
942
943
944void usr1Handler(int)
945{
946  statsWanted=true;
947}
948
949
950
951void usr2Handler(int)
952{
953  SyncRes::setLog(true);
954  g_quiet=false;
955  ::arg().set("quiet")="no";
956
957}
958
959void doStats(void)
960{
961  if(g_stats.qcounter && (RC.cacheHits + RC.cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
962    L<<Logger::Error<<"stats: "<<g_stats.qcounter<<" questions, "<<RC.size()<<" cache entries, "<<SyncRes::s_negcache.size()<<" negative entries, "
963     <<(int)((RC.cacheHits*100.0)/(RC.cacheHits+RC.cacheMisses))<<"% cache hits"<<endl;
964    L<<Logger::Error<<"stats: throttle map: "<<SyncRes::s_throttle.size()<<", ns speeds: "
965     <<SyncRes::s_nsSpeeds.size()<<endl; // ", bytes: "<<RC.bytes()<<endl;
966    L<<Logger::Error<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
967    L<<Logger::Error<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
968     <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
969    L<<Logger::Error<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<MT->numProcesses()<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
970  }
971  else if(statsWanted) 
972    L<<Logger::Error<<"stats: no stats yet!"<<endl;
973
974  statsWanted=false;
975}
976
977static void houseKeeping(void *)
978try
979{
980  static time_t last_stat, last_rootupdate, last_prune;
981  struct timeval now;
982  Utility::gettimeofday(&now, 0);
983
984  if(now.tv_sec - last_prune > 300) { 
985    DTime dt;
986    dt.setTimeval(now);
987    RC.doPrune();
988   
989    typedef SyncRes::negcache_t::nth_index<1>::type negcache_by_ttd_index_t;
990    negcache_by_ttd_index_t& ttdindex=boost::multi_index::get<1>(SyncRes::s_negcache);
991
992    negcache_by_ttd_index_t::iterator i=ttdindex.lower_bound(now.tv_sec);
993    ttdindex.erase(ttdindex.begin(), i);
994
995    time_t limit=now.tv_sec-300;
996    for(SyncRes::nsspeeds_t::iterator i = SyncRes::s_nsSpeeds.begin() ; i!= SyncRes::s_nsSpeeds.end(); )
997      if(i->second.stale(limit))
998        SyncRes::s_nsSpeeds.erase(i++);
999      else
1000        ++i;
1001
1002    //   cerr<<"Pruned "<<pruned<<" records, left "<<SyncRes::s_negcache.size()<<"\n";
1003//    cout<<"Prune took "<<dt.udiff()<<"usec\n";
1004    last_prune=time(0);
1005  }
1006  if(now.tv_sec - last_stat>1800) { 
1007    doStats();
1008    last_stat=time(0);
1009  }
1010  if(now.tv_sec - last_rootupdate > 7200) {
1011    SyncRes sr(now);
1012    vector<DNSResourceRecord> ret;
1013
1014    sr.setNoCache();
1015    int res=sr.beginResolve(".", QType(QType::NS), 1, ret);
1016    if(!res) {
1017      L<<Logger::Error<<"Refreshed . records"<<endl;
1018      last_rootupdate=now.tv_sec;
1019    }
1020    else
1021      L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
1022  }
1023}
1024catch(AhuException& ae)
1025{
1026  L<<Logger::Error<<"Fatal error: "<<ae.reason<<endl;
1027  throw;
1028}
1029;
1030
1031string questionExpand(const char* packet, uint16_t len, uint16_t& type)
1032{
1033  type=0;
1034  const unsigned char* end=(const unsigned char*)packet+len;
1035  const unsigned char* pos=(const unsigned char*)packet+12;
1036  unsigned char labellen;
1037  string ret;
1038  ret.reserve(len-12);
1039  while((labellen=*pos++)) {
1040    if(pos+labellen > end)
1041      break;
1042    ret.append((const char*)pos, labellen);
1043    ret.append(1,'.');
1044    pos+=labellen;
1045  }
1046  if(ret.empty())
1047    ret=".";
1048
1049  if(pos + labellen + 2 <= end)  // is this correct XXX FIXME?
1050    type=(*pos)*256 + *(pos+1);
1051   
1052  return ret;
1053}
1054
1055
1056void handleRCC(int fd, boost::any& var)
1057{
1058  string remote;
1059  string msg=s_rcc.recv(&remote);
1060  RecursorControlParser rcp;
1061  RecursorControlParser::func_t* command;
1062  string answer=rcp.getAnswer(msg, &command);
1063  try {
1064    s_rcc.send(answer, &remote);
1065    command();
1066  }
1067  catch(exception& e) {
1068    L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
1069  }
1070  catch(AhuException& ae) {
1071    L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
1072  }
1073}
1074
1075void handleTCPClientReadable(int fd, boost::any& var)
1076{
1077  PacketID* pident=any_cast<PacketID>(&var);
1078  //  cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
1079
1080  shared_array<char> buffer(new char[pident->inNeeded]);
1081
1082  int ret=recv(fd, buffer.get(), pident->inNeeded,0);
1083  if(ret > 0) {
1084    pident->inMSG.append(&buffer[0], &buffer[ret]);
1085    pident->inNeeded-=ret;
1086    if(!pident->inNeeded) {
1087      //      cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
1088      PacketID pid=*pident;
1089      string msg=pident->inMSG;
1090     
1091      g_fdm->removeReadFD(fd);
1092      MT->sendEvent(pid, &msg); 
1093    }
1094    else {
1095      //      cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
1096    }
1097  }
1098  else {
1099    PacketID tmp=*pident;
1100    g_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
1101    string empty;
1102    MT->sendEvent(tmp, &empty); // this conveys error status
1103  }
1104}
1105
1106void handleTCPClientWritable(int fd, boost::any& var)
1107{
1108  PacketID* pid=any_cast<PacketID>(&var);
1109 
1110  int ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
1111  if(ret > 0) {
1112    pid->outPos+=ret;
1113    if(pid->outPos==pid->outMSG.size()) {
1114      PacketID tmp=*pid;
1115      g_fdm->removeWriteFD(fd);
1116      MT->sendEvent(tmp, &tmp.outMSG);  // send back what we sent to convey everything is ok
1117    }
1118  }
1119  else {  // error or EOF
1120    PacketID tmp(*pid);
1121    g_fdm->removeWriteFD(fd);
1122    string sent;
1123    MT->sendEvent(tmp, &sent);         // we convey error status by sending empty string
1124  }
1125}
1126
1127// resend event to everybody chained onto it
1128void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
1129{
1130  if(iter->key.chain.empty())
1131    return;
1132
1133  for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
1134    resend.fd=-1;
1135    resend.id=*i;
1136    MT->sendEvent(resend, &content);
1137    g_stats.chainResends++;
1138    //    cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<": "<< res <<endl;
1139  }
1140}
1141
1142void handleUDPServerResponse(int fd, boost::any& var)
1143{
1144  PacketID pid=any_cast<PacketID>(var);
1145  int len;
1146  char data[1500];
1147  ComboAddress fromaddr;
1148  socklen_t addrlen=sizeof(fromaddr);
1149
1150  len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
1151
1152  if(len < (int)sizeof(dnsheader)) {
1153    if(len < 0)
1154      ; //      cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
1155    else {
1156      g_stats.serverParseError++; 
1157      if(g_logCommonErrors)
1158        L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< sockAddrToString((struct sockaddr_in*) &fromaddr) <<
1159          ": packet smalller than DNS header"<<endl;
1160    }
1161
1162    g_udpclientsocks.returnSocket(fd);
1163    string empty;
1164
1165    MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
1166    if(iter != MT->d_waiters.end()) 
1167      doResends(iter, pid, empty);
1168   
1169    MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
1170    return;
1171  } 
1172
1173  dnsheader dh;
1174  memcpy(&dh, data, sizeof(dh));
1175 
1176  if(!dh.qdcount) // UPC, Nominum?
1177    return;
1178 
1179  if(dh.qr) {
1180    PacketID pident;
1181    pident.remote=fromaddr;
1182    pident.id=dh.id;
1183    pident.fd=fd;
1184    pident.domain=questionExpand(data, len, pident.type); // don't copy this from above - we need to do the actual read
1185    string packet;
1186    packet.assign(data, len);
1187
1188    MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
1189    if(iter != MT->d_waiters.end()) {
1190      doResends(iter, pident, packet);
1191    }
1192
1193    if(!MT->sendEvent(pident, &packet)) {
1194//      if(g_logCommonErrors)
1195//        L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toString()<<": "<<pident.type<<endl;
1196      g_stats.unexpectedCount++;
1197     
1198      for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
1199        if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote &&  mthread->key.type == pident.type &&
1200           !Utility::strcasecmp(pident.domain.c_str(), mthread->key.domain.c_str())) {
1201          mthread->key.nearMisses++;
1202        }
1203      }
1204    }
1205    else if(fd >= 0)
1206      g_udpclientsocks.returnSocket(fd);
1207  }
1208  else
1209    L<<Logger::Warning<<"Ignoring question on outgoing socket from "<< sockAddrToString((struct sockaddr_in*) &fromaddr)  <<endl;
1210}
1211
1212FDMultiplexer* getMultiplexer()
1213{
1214  FDMultiplexer* ret;
1215  for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
1216      i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
1217    try {
1218      ret=i->second();
1219      L<<Logger::Error<<"Enabled '"<<ret->getName()<<"' multiplexer"<<endl;
1220      return ret;
1221    }
1222    catch(FDMultiplexerException &fe) {
1223      L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
1224    }
1225    catch(...) {
1226      L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
1227    }
1228  }
1229  L<<Logger::Error<<"No working multiplexer found!"<<endl;
1230  exit(1);
1231}
1232
1233static void makeNameToIPZone(const string& hostname, const string& ip)
1234{
1235  SyncRes::AuthDomain ad;
1236  DNSResourceRecord rr;
1237  rr.qname=toCanonic("", hostname);
1238  rr.d_place=DNSResourceRecord::ANSWER;
1239  rr.ttl=86400;
1240  rr.qtype=QType::SOA;
1241  rr.content="localhost. root 1 604800 86400 2419200 604800";
1242 
1243  ad.d_records.insert(rr);
1244
1245  rr.qtype=QType::NS;
1246  rr.content="localhost.";
1247
1248  ad.d_records.insert(rr);
1249 
1250  rr.qtype=QType::A;
1251  rr.content=ip;
1252  ad.d_records.insert(rr);
1253 
1254  if(SyncRes::s_domainmap.count(rr.qname)) {
1255    L<<Logger::Warning<<"Hosts file will not overwrite zone '"<<rr.qname<<"' already loaded"<<endl;
1256  }
1257  else {
1258    L<<Logger::Warning<<"Inserting forward zone '"<<rr.qname<<"' based on hosts file"<<endl;
1259    SyncRes::s_domainmap[rr.qname]=ad;
1260  }
1261}
1262
1263//! parts[0] must be an IP address, the rest must be host names
1264static void makeIPToNamesZone(const vector<string>& parts) 
1265{
1266  string address=parts[0];
1267  vector<string> ipparts;
1268  stringtok(ipparts, address,".");
1269 
1270  SyncRes::AuthDomain ad;
1271  DNSResourceRecord rr;
1272  for(int n=ipparts.size()-1; n>=0 ; --n) {
1273    rr.qname.append(ipparts[n]);
1274    rr.qname.append(1,'.');
1275  }
1276  rr.qname.append("in-addr.arpa.");
1277
1278  rr.d_place=DNSResourceRecord::ANSWER;
1279  rr.ttl=86400;
1280  rr.qtype=QType::SOA;
1281  rr.content="localhost. root. 1 604800 86400 2419200 604800";
1282 
1283  ad.d_records.insert(rr);
1284
1285  rr.qtype=QType::NS;
1286  rr.content="localhost.";
1287
1288  ad.d_records.insert(rr);
1289  rr.qtype=QType::PTR;
1290
1291  if(ipparts.size()==4)  // otherwise this is a partial zone
1292    for(unsigned int n=1; n < parts.size(); ++n) {
1293      rr.content=toCanonic("", parts[n]);
1294      ad.d_records.insert(rr);
1295    }
1296
1297  if(SyncRes::s_domainmap.count(rr.qname)) {
1298    L<<Logger::Warning<<"Will not overwrite zone '"<<rr.qname<<"' already loaded"<<endl;
1299  }
1300  else {
1301    if(ipparts.size()==4)
1302      L<<Logger::Warning<<"Inserting reverse zone '"<<rr.qname<<"' based on hosts file"<<endl;
1303    SyncRes::s_domainmap[rr.qname]=ad;
1304  }
1305}
1306
1307
1308void parseAuthAndForwards();
1309
1310string reloadAuthAndForwards()
1311{
1312  SyncRes::domainmap_t original=SyncRes::s_domainmap;
1313 
1314  try {
1315    L<<Logger::Warning<<"Reloading zones, purging data from cache"<<endl;
1316 
1317    for(SyncRes::domainmap_t::const_iterator i = SyncRes::s_domainmap.begin(); i != SyncRes::s_domainmap.end(); ++i) {
1318      for(SyncRes::AuthDomain::records_t::const_iterator j = i->second.d_records.begin(); j != i->second.d_records.end(); ++j) 
1319        RC.doWipeCache(j->qname);
1320    }
1321
1322    string configname=::arg()["config-dir"]+"/recursor.conf";
1323    cleanSlashes(configname);
1324   
1325    if(!::arg().preParseFile(configname.c_str(), "forward-zones")) 
1326      L<<Logger::Warning<<"Unable to re-parse configuration file '"<<configname<<"'"<<endl;
1327   
1328    ::arg().preParseFile(configname.c_str(), "auth-zones");
1329    ::arg().preParseFile(configname.c_str(), "export-etc-hosts");
1330    ::arg().preParseFile(configname.c_str(), "serve-rfc1918");
1331   
1332    parseAuthAndForwards();
1333   
1334    // purge again - new zones need to blank out the cache
1335    for(SyncRes::domainmap_t::const_iterator i = SyncRes::s_domainmap.begin(); i != SyncRes::s_domainmap.end(); ++i) {
1336      for(SyncRes::AuthDomain::records_t::const_iterator j = i->second.d_records.begin(); j != i->second.d_records.end(); ++j) 
1337        RC.doWipeCache(j->qname);
1338    }
1339
1340    // this is pretty blunt
1341    SyncRes::s_negcache.clear(); 
1342    return "ok\n";
1343  }
1344  catch(exception& e) {
1345    L<<Logger::Error<<"Had error reloading zones, keeping original data: "<<e.what()<<endl;
1346  }
1347  catch(AhuException& ae) {
1348    L<<Logger::Error<<"Encountered error reloading zones, keeping original data: "<<ae.reason<<endl;
1349  }
1350  catch(...) {
1351    L<<Logger::Error<<"Encountered unknown error reloading zones, keeping original data"<<endl;
1352  }
1353  SyncRes::s_domainmap.swap(original);
1354  return "reloading failed, see log\n";
1355}
1356
1357void parseAuthAndForwards()
1358{
1359  SyncRes::s_domainmap.clear(); // this makes us idempotent
1360
1361  TXTRecordContent::report();
1362
1363  typedef vector<string> parts_t;
1364  parts_t parts; 
1365  for(int n=0; n < 2 ; ++n ) {
1366    parts.clear();
1367    stringtok(parts, ::arg()[n ? "forward-zones" : "auth-zones"], ",\t\n\r");
1368    for(parts_t::const_iterator iter = parts.begin(); iter != parts.end(); ++iter) {
1369      SyncRes::AuthDomain ad;
1370      pair<string,string> headers=splitField(*iter, '=');
1371      trim(headers.first);
1372      trim(headers.second);
1373      headers.first=toCanonic("", headers.first);
1374      if(n==0) {
1375        L<<Logger::Error<<"Parsing authoritative data for zone '"<<headers.first<<"' from file '"<<headers.second<<"'"<<endl;
1376        ZoneParserTNG zpt(headers.second, headers.first);
1377        DNSResourceRecord rr;
1378        while(zpt.get(rr)) {
1379          try {
1380            string tmp=DNSRR2String(rr);
1381            rr=String2DNSRR(rr.qname, rr.qtype, tmp, 3600);
1382          }
1383          catch(exception &e) {
1384            throw AhuException("Error parsing record '"+rr.qname+"' of type "+rr.qtype.getName()+" in zone '"+headers.first+"' from file '"+headers.second+"': "+e.what());
1385          }
1386          catch(...) {
1387            throw AhuException("Error parsing record '"+rr.qname+"' of type "+rr.qtype.getName()+" in zone '"+headers.first+"' from file '"+headers.second+"'");
1388          }
1389
1390          ad.d_records.insert(rr);
1391
1392        }
1393      }
1394      else {
1395        L<<Logger::Error<<"Redirecting queries for zone '"<<headers.first<<"' to IP '"<<headers.second<<"'"<<endl;
1396        ad.d_server=headers.second;
1397      }
1398     
1399      SyncRes::s_domainmap[headers.first]=ad;
1400    }
1401  }
1402 
1403  if(::arg().mustDo("export-etc-hosts")) {
1404    string line;
1405    string fname;
1406   
1407    ifstream ifs("/etc/hosts");
1408    if(!ifs) {
1409      L<<Logger::Warning<<"Could not open /etc/hosts for reading"<<endl;
1410      return;
1411    }
1412   
1413    string::size_type pos;
1414    while(getline(ifs,line)) {
1415      pos=line.find('#');
1416      if(pos!=string::npos)
1417        line.resize(pos);
1418      trim(line);
1419      if(line.empty())
1420        continue;
1421      parts.clear();
1422      stringtok(parts, line, "\t\r\n ");
1423      if(parts[0].find(':')!=string::npos)
1424        continue;
1425     
1426      for(unsigned int n=1; n < parts.size(); ++n)
1427        makeNameToIPZone(parts[n], parts[0]);
1428      makeIPToNamesZone(parts);
1429    }
1430  }
1431  if(::arg().mustDo("serve-rfc1918")) {
1432    L<<Logger::Warning<<"Inserting rfc 1918 private space zones"<<endl;
1433    parts.clear();
1434    parts.push_back("127");
1435    makeIPToNamesZone(parts);
1436    parts[0]="10";
1437    makeIPToNamesZone(parts);
1438
1439    parts[0]="192.168";
1440    makeIPToNamesZone(parts);
1441    for(int n=16; n < 32; n++) {
1442      parts[0]="172."+lexical_cast<string>(n);
1443      makeIPToNamesZone(parts);
1444    }
1445  }
1446}
1447
1448int serviceMain(int argc, char*argv[])
1449{
1450  L.setName("pdns_recursor");
1451
1452  L.setLoglevel((Logger::Urgency)(6)); // info and up
1453
1454  if(!::arg()["logging-facility"].empty()) {
1455    boost::optional<int> val=logFacilityToLOG(::arg().asNum("logging-facility") );
1456    if(val)
1457      theL().setFacility(*val);
1458    else
1459      L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
1460  }
1461
1462  L<<Logger::Warning<<"PowerDNS recursor "<<VERSION<<" (C) 2001-2006 PowerDNS.COM BV ("<<__DATE__", "__TIME__;
1463#ifdef __GNUC__
1464  L<<", gcc "__VERSION__;
1465#endif // add other compilers here
1466#ifdef _MSC_VER
1467  L<<", MSVC "<<_MSC_VER;
1468#endif
1469  L<<") starting up"<<endl;
1470 
1471  L<<Logger::Warning<<"PowerDNS comes with ABSOLUTELY NO WARRANTY. "
1472    "This is free software, and you are welcome to redistribute it "
1473    "according to the terms of the GPL version 2."<<endl;
1474 
1475  L<<Logger::Warning<<"Operating in "<<(sizeof(unsigned long)*8) <<" bits mode"<<endl;
1476 
1477  if(!::arg()["allow-from"].empty()) {
1478    g_allowFrom=new NetmaskGroup;
1479    vector<string> ips;
1480    stringtok(ips, ::arg()["allow-from"], ", ");
1481    L<<Logger::Warning<<"Only allowing queries from: ";
1482    for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1483      g_allowFrom->addMask(*i);
1484      if(i!=ips.begin())
1485        L<<Logger::Warning<<", ";
1486      L<<Logger::Warning<<*i;
1487    }
1488    L<<Logger::Warning<<endl;
1489  }
1490  else if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
1491    L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
1492 
1493  if(!::arg()["dont-query"].empty()) {
1494    g_dontQuery=new NetmaskGroup;
1495    vector<string> ips;
1496    stringtok(ips, ::arg()["dont-query"], ", ");
1497    L<<Logger::Warning<<"Will not send queries to: ";
1498    for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
1499      g_dontQuery->addMask(*i);
1500      if(i!=ips.begin())
1501        L<<Logger::Warning<<", ";
1502      L<<Logger::Warning<<*i;
1503    }
1504    L<<Logger::Warning<<endl;
1505  }
1506
1507  g_quiet=::arg().mustDo("quiet");
1508  if(::arg().mustDo("trace")) {
1509    SyncRes::setLog(true);
1510    ::arg().set("quiet")="no";
1511    g_quiet=false;
1512  }
1513
1514  RC.d_followRFC2181=::arg().mustDo("auth-can-lower-ttl");
1515 
1516  if(!::arg()["query-local-address6"].empty()) {
1517    SyncRes::s_doIPv6=true;
1518    L<<Logger::Error<<"Enabling IPv6 transport for outgoing queries"<<endl;
1519  }
1520 
1521  SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
1522  SyncRes::s_serverID=::arg()["server-id"];
1523  if(SyncRes::s_serverID.empty()) {
1524    char tmp[128];
1525    gethostname(tmp, sizeof(tmp)-1);
1526    SyncRes::s_serverID=tmp;
1527  }
1528 
1529 
1530  parseAuthAndForwards();
1531 
1532  g_stats.remotes.resize(::arg().asNum("remotes-ringbuffer-entries"));
1533  if(!g_stats.remotes.empty())
1534    memset(&g_stats.remotes[0], 0, g_stats.remotes.size() * sizeof(RecursorStats::remotes_t::value_type));
1535  g_logCommonErrors=::arg().mustDo("log-common-errors");
1536 
1537  makeUDPServerSockets();
1538  makeTCPServerSockets();
1539 
1540#ifndef WIN32
1541  if(::arg().mustDo("fork")) {
1542    fork();
1543    L<<Logger::Warning<<"This is forked pid "<<getpid()<<endl;
1544  }
1545#endif
1546 
1547  MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
1548  makeControlChannelSocket();       
1549  PacketID pident;
1550  primeHints();   
1551  L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
1552#ifndef WIN32
1553  if(::arg().mustDo("daemon")) {
1554    L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
1555    L.toConsole(Logger::Critical);
1556    daemonize();
1557  }
1558  signal(SIGUSR1,usr1Handler);
1559  signal(SIGUSR2,usr2Handler);
1560  signal(SIGPIPE,SIG_IGN);
1561  writePid();
1562#endif
1563  g_fdm=getMultiplexer();
1564 
1565  for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i) 
1566    g_fdm->addReadFD(i->first, i->second);
1567 
1568  int newgid=0;
1569  if(!::arg()["setgid"].empty())
1570    newgid=Utility::makeGidNumeric(::arg()["setgid"]);
1571  int newuid=0;
1572  if(!::arg()["setuid"].empty())
1573    newuid=Utility::makeUidNumeric(::arg()["setuid"]);
1574 
1575#ifndef WIN32
1576  if (!::arg()["chroot"].empty()) {
1577    if (chroot(::arg()["chroot"].c_str())<0) {
1578      L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
1579      exit(1);
1580    }
1581  }
1582 
1583  Utility::dropPrivs(newuid, newgid);
1584  g_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
1585#endif
1586 
1587  counter=0;
1588  unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
1589  g_tcpTimeout=::arg().asNum("client-tcp-timeout");
1590 
1591  g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
1592 
1593 
1594  bool listenOnTCP(true);
1595 
1596  for(;;) {
1597    while(MT->schedule(g_now.tv_sec)); // housekeeping, let threads do their thing
1598     
1599    if(!(counter%500)) {
1600      MT->makeThread(houseKeeping,0);
1601    }
1602
1603    if(!(counter%55)) {
1604      typedef vector<pair<int, boost::any> > expired_t;
1605      expired_t expired=g_fdm->getTimeouts(g_now);
1606       
1607      for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
1608        TCPConnection conn=any_cast<TCPConnection>(i->second);
1609        if(g_logCommonErrors)
1610          L<<Logger::Warning<<"Timeout from remote TCP client "<< conn.remote.toString() <<endl;
1611        g_fdm->removeReadFD(i->first);
1612        conn.closeAndCleanup();
1613      }
1614    }
1615     
1616    counter++;
1617
1618    if(statsWanted) {
1619      doStats();
1620    }
1621
1622    Utility::gettimeofday(&g_now, 0);
1623    g_fdm->run(&g_now);
1624
1625    if(listenOnTCP) {
1626      if(TCPConnection::s_currentConnections > maxTcpClients) {  // shutdown
1627        for(g_tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
1628          g_fdm->removeReadFD(*i);
1629        listenOnTCP=false;
1630      }
1631    }
1632    else {
1633      if(TCPConnection::s_currentConnections <= maxTcpClients) {  // reenable
1634        for(g_tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
1635          g_fdm->addReadFD(*i, handleNewTCPQuestion);
1636        listenOnTCP=true;
1637      }
1638    }
1639  }
1640}
1641#ifdef WIN32
1642void doWindowsServiceArguments(RecursorService& recursor)
1643{
1644  if(::arg().mustDo( "register-service" )) {
1645    if ( !recursor.registerService( "The PowerDNS Recursor.", true )) {
1646      cerr << "Could not register service." << endl;
1647      exit( 99 );
1648    }
1649   
1650    exit( 0 );
1651  }
1652
1653  if ( ::arg().mustDo( "unregister-service" )) {
1654    recursor.unregisterService();
1655    exit( 0 );
1656  }
1657}
1658#endif
1659
1660int main(int argc, char **argv) 
1661{
1662  reportBasicTypes();
1663
1664  int ret = EXIT_SUCCESS;
1665#ifdef WIN32
1666  RecursorService service;
1667  WSADATA wsaData;
1668  if(WSAStartup( MAKEWORD( 2, 2 ), &wsaData )) {
1669    cerr<<"Unable to initialize winsock\n";
1670    exit(1);
1671  }
1672#endif // WIN32
1673
1674  try {
1675    Utility::srandom(time(0));
1676    ::arg().set("stack-size","stack size per mthread")="200000";
1677    ::arg().set("soa-minimum-ttl","Don't change")="0";
1678    ::arg().set("soa-serial-offset","Don't change")="0";
1679    ::arg().set("no-shuffle","Don't change")="off";
1680    ::arg().set("aaaa-additional-processing","turn on to do AAAA additional processing (slow)")="off";
1681    ::arg().set("local-port","port to listen on")="53";
1682    ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
1683    ::arg().set("trace","if we should output heaps of logging")="off";
1684    ::arg().set("daemon","Operate as a daemon")="yes";
1685    ::arg().set("log-common-errors","If we should log rather common errors")="yes";
1686    ::arg().set("chroot","switch to chroot jail")="";
1687    ::arg().set("setgid","If set, change group id to this gid for more security")="";
1688    ::arg().set("setuid","If set, change user id to this uid for more security")="";
1689#ifdef WIN32
1690    ::arg().set("quiet","Suppress logging of questions and answers")="off";
1691    ::arg().setSwitch( "register-service", "Register the service" )= "no";
1692    ::arg().setSwitch( "unregister-service", "Unregister the service" )= "no";
1693    ::arg().setSwitch( "ntservice", "Run as service" )= "no";
1694    ::arg().setSwitch( "use-ntlog", "Use the NT logging facilities" )= "yes"; 
1695    ::arg().setSwitch( "use-logfile", "Use a log file" )= "no"; 
1696    ::arg().setSwitch( "logfile", "Filename of the log file" )= "recursor.log"; 
1697#else
1698    ::arg().set("quiet","Suppress logging of questions and answers")="";
1699    ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
1700#endif
1701    ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
1702    ::arg().set("socket-dir","Where the controlsocket will live")=LOCALSTATEDIR;
1703    ::arg().set("delegation-only","Which domains we only accept delegations from")="";
1704    ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
1705    ::arg().set("query-local-address6","Source IPv6 address for sending queries")="";
1706    ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
1707    ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
1708    ::arg().set("hint-file", "If set, load root hints from this file")="";
1709    ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="0";
1710    ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
1711    ::arg().set("server-id", "Returned when queried for 'server.id' TXT, defaults to hostname")="";
1712    ::arg().set("remotes-ringbuffer-entries", "maximum number of packets to store statistics for")="0";
1713    ::arg().set("version-string", "string reported on version.pdns or version.bind")="PowerDNS Recursor "VERSION" $Id$";
1714    ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fe80::/10";
1715    ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")="127.0.0.0/8, 10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fe80::/10";
1716    ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
1717    ::arg().set("fork", "If set, fork the daemon for possible double performance")="no";
1718    ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
1719    ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
1720    ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
1721    ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
1722    ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
1723    ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
1724    ::arg().set("auth-can-lower-ttl", "If we follow RFC 2181 to the letter, an authoritative server can lower the TTL of NS records")="off";
1725    ::arg().setSwitch( "ignore-rd-bit", "Assume each packet requires recursion, for compatability" )= "off"; 
1726
1727    ::arg().setCmd("help","Provide a helpful message");
1728    ::arg().setCmd("config","Output blank configuration");
1729    L.toConsole(Logger::Info);
1730    ::arg().laxParse(argc,argv); // do a lax parse
1731
1732    string configname=::arg()["config-dir"]+"/recursor.conf";
1733    cleanSlashes(configname);
1734
1735    if(!::arg().file(configname.c_str())) 
1736      L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
1737
1738    ::arg().parse(argc,argv);
1739
1740    ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
1741
1742    if(::arg().mustDo("help")) {
1743      cerr<<"syntax:"<<endl<<endl;
1744      cerr<<::arg().helpstring(::arg()["help"])<<endl;
1745      exit(99);
1746    }
1747
1748    if(::arg().mustDo("config")) {
1749      cout<<::arg().configstring()<<endl;
1750      exit(0);
1751    }
1752
1753#ifndef WIN32
1754    serviceMain(argc, argv);
1755#else
1756    doWindowsServiceArguments(service);
1757        L.toNTLog();
1758    RecursorService::instance()->start( argc, argv, ::arg().mustDo( "ntservice" )); 
1759#endif
1760
1761  }
1762  catch(AhuException &ae) {
1763    L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
1764    ret=EXIT_FAILURE;
1765  }
1766  catch(exception &e) {
1767    L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
1768    ret=EXIT_FAILURE;
1769  }
1770  catch(...) {
1771    L<<Logger::Error<<"any other exception in main: "<<endl;
1772    ret=EXIT_FAILURE;
1773  }
1774 
1775#ifdef WIN32
1776  WSACleanup();
1777#endif // WIN32
1778
1779  return ret;
1780}
Note: See TracBrowser for help on using the browser.